Contents

1 Introduction

In this vignette we present the complete analysis of the 2021 Drug-Cytokine Combinatorial Screen project and source code for the paper:


Mapping drug-microenvironment-genetic interplay in CLL reveals trisomy 12 as a modulator of microenvironmental signals

Peter-Martin Bruch*, Holly A. R. Giles*, Carolin Kolb, Sophie A. Herbst, Tina Becirovic, Tobias Roider, Junyan Lu, Sebastian Scheinost, Lena Wagner, Jennifer Huellein, Ivan Berest, Mark Kriegsmann, Katharina Kriegsmann, Christiane Zgorzelski, Peter Dreger, Judith B. Zaugg, Carsten Müller-Tidow, Thorsten Zenz, Wolfgang Huber*, Sascha Dietrich*

The presented analysis was performed by Holly Giles and Peter-Martin Bruch, building on scripts from Sascha Dietrich, Wolfgang Huber, Junyan Lu, Malgorzata Oles, Frederik Ziebell, Sophie Herbst and Ivan Berest. This vignette was assembled by Holly Giles.

This vignette produces all seven figures of the paper. The vignette builds from the sub-vignettes, which are separated by horizontal lines. Each subvignette can also be built separately.

#load all libraries
library(plyr)
library(Hmisc)
library(gridExtra)
library(ggplot2)
library(magick)
library(patchwork)
library(cowplot)
library(RColorBrewer)
library(ggrepel)
library(ggbeeswarm)
library(magrittr)
library(pheatmap)
library(ggfortify)
library(DESeq2)
library(survival)
library(survminer)
library(glmnet)
library(ConsensusClusterPlus)
library(clusterProfiler)
library(org.Hs.eg.db)
library(msigdbr)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(ChIPseeker)
library(genomation)
library(ggpubr)
library(gtable)
library(scales)
library(broom)
library(formattable)
library(plyr)
library(png)
library(cowplot)
library(data.table)
library(maxstat)
library(rlang)
library(tidyr)
library(tidyverse)
library(dplyr)

In this sub-vignette we present the analysis and source code for figure 1. This sub-vignette can be built along with all other sub-vignettes by running CLLCytokineScreen2021.Rmd.

2 Set up

Load libraries

library(gridExtra)
library(ggplot2)
library(magick)
library(patchwork)
library(cowplot)
library(dplyr)
library(tidyr)
library(tidyverse)

Set plot directory

plotDir = ifelse(exists(".standalone"), "", "../../inst/figs/")
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)

3 Load data

#drugs: dataframe containing meta data on all drugs used in screen 
load( "../../data/drugs.RData")

#cytokines: dataframe containing meta data on all stimuli used in screen 
load( "../../data/cytokines.RData")

#df: tibblecontaining all screening viability data
load( "../../data/df.RData")
#from tsvs
df <- read.table(file= "../../inst/extdata/df.txt",header = TRUE) %>% as_tibble()
drugs <- read.delim("../../inst/extdata/Drugs.txt")
cytokines <- read.delim("../../inst/extdata/Cytokines.txt")

df$Cytokine <- factor(df$Cytokine, levels= c("No Cytokine",
                                              "Resiquimod", 
                                              "IL-4", 
                                              "TGF-b1",
                                              "IL-1b",
                                              "Interferon gamma",
                                              "SDF-1a",
                                              "sCD40L" ,
                                              "sCD40L+IL-4",
                                              "soluble anti-IgM", 
                                              "CpG ODN",
                                              "IL-6",
                                              "IL-10",
                                              "IL-21",
                                              "HS-5 CM", 
                                              "IL-15",
                                              "BAFF",
                                              "IL-2" ))

df$Drug <- factor(df$Drug, levels =c("DMSO",
                                     "BAY-11-7085",
                                     "Everolimus",
                                     "Fludarabine",
                                     "I-BET 762",
                                     "Ibrutinib","Idelalisib",
                                     "Luminespib",
                                     "Nutlin-3a",
                                     "PRT062607",
                                     "Pyridone-6",
                                     "Ralimetinib",
                                     "Selumetinib"))

4 Define Aesthetics

source("../../R/themes_colors.R")

5 Plot figures

5.1 Fig 1A

Plot graphic of screen overview

#plot pdf file
Fig1A  <- magick::image_read_pdf("../../inst/images/Fig1A.pdf", pages = 1, density = 100)

Fig1A <- ggdraw() + draw_image(Fig1A, 
                               scale = 1)

Fig1A

5.2 Fig 1B

Overview of cytokine pathways

#plot pdf file
Fig1B  <- magick::image_read_pdf("../../inst/images/Fig1B.pdf", pages = 1, density = 100)

Fig1B <- ggdraw() + draw_image(Fig1B, 
                               scale = 1)
Fig1B

5.3 Fig 1C

Overview of drug pathways and approval status

#set table theme
ttable <- ttheme_minimal(
  core=list(fg_params=list(col = darkergrey,fontface=3)),
  
  colhead=list(fg_params=list(col="black", fontface=4L),
               bg_params = list(fill= colors[1])))
#make table
Fig1C<-
  drugs %>% 
  dplyr::select(`Name`, `main_targets`, `target_category`) %>%
  dplyr::rename(Drug=`Name`, Target=`main_targets`, Category=`target_category`) %>% 
  dplyr::arrange(Category) %>% 
  gridExtra::tableGrob(theme = ttable, rows=NULL) 


wrap_elements(Fig1C)

5.4 Fig 1D

Forrest plot of top 8 drug - drug Pearson Correlation Coefficients
Generate drug-only viability matrix

#Select high conc drug data and generate drug-only viability matrix
Drug_mat <- 
  df %>%
  dplyr::filter(Drug != "DMSO", Drug_Concentration == "High", Cytokine== "No Cytokine") %>%
  
  #Get log(viability) values for all drugs, for each patient         
  dplyr::select(PatientID, Drug, Log) %>% 
  spread(Drug, Log) %>%
  dplyr::select(-"PatientID")%>%
  as.matrix()

Calculate Pearson Coefficients for all drug-drug combinations

# Pearson Coefficients for all drug-drug combinations

#get a list of all drug -drug combinations 
drug.comb <- 
  combn(colnames(Drug_mat), 2) %>% 
  t() %>% 
  as.data.frame()  %>% 
  mutate(comb = paste(V1, V2, sep = ":"))

#set up list of dataframes to store outputs of cor.test, for each drug -drug combination
drug.corr <- vector("list", length = nrow(drug.comb))
#set names as drug - drug combinations
names(drug.corr)  <- drug.comb$comb


#calculate Correlations and confidence intervals for every drug - drug combination
for(drug1 in colnames(Drug_mat)){
  for(drug2 in colnames(Drug_mat)){
    
    #subset drug viability matrix for each drug of interest
    x <- Drug_mat[,drug1]
    y <- Drug_mat[,drug2]
    
    #define drug - drug combination
    comb <-paste(drug1, drug2, sep=":")
    
    #only run cor.test if this combination is not rendundant
    if(comb%in% names(drug.corr)){
    
    #run correlation test
    cor <- cor.test(x, y, conf.level = 0.95, method = c("pearson"))
    
    #convert to table
    cor.df <- unlist(cor) %>% as.data.frame()
    colnames(cor.df) <- comb
    
    #add corr.test output to drug.corr dataframe
    drug.corr[[comb]] <- cor.df
    }
  }
}

#bind list of dataframes into single dataframe
drug.corr <- do.call(cbind, drug.corr)

Arrange data for plotting

#transform dataframe
drug.corr <- 
  drug.corr %>% 
  rownames_to_column("feature") %>% 
  t() %>% 
  as.data.frame()

#tidy up drug.corr dataframe
colnames(drug.corr) <- drug.corr[1,]
drug.corr <- drug.corr[-1,]
drug.corr <- 
  drug.corr %>% 
  rownames_to_column("Combination")

#sort out class types
drug.corr$estimate.cor <- as.numeric(drug.corr$estimate.cor)
drug.corr$conf.int1 <- as.numeric(drug.corr$conf.int1)
drug.corr$conf.int2 <- as.numeric(drug.corr$conf.int2)

#get top 8 correlations with highest R
sig.drug <- 
  drug.corr %>%
  dplyr::filter(estimate.cor<1) %>%
  top_n(8, estimate.cor)

#Make drug combinations readable
sig.drug$Combination <- gsub(':', ' and ', sig.drug$Combination)

#Get order in which to plot
order.drug <- 
  sig.drug %>%
  dplyr::arrange(estimate.cor) %>%
  dplyr::select(Combination) %>%
  unlist()
  
sig.drug$Combination <- factor(sig.drug$Combination, levels=order.drug)
#Plot Forest Plot
Fig1D = 
  ggplot(sig.drug, 
         #PLot 8 drug-drug combinationns, against Pearson Coeffiecnet and show confidence intervals 
         aes(x = Combination, y = estimate.cor, ymin = conf.int1, ymax = conf.int2 )) +
  geom_pointrange(color = drugpal[1])+
  geom_errorbar(aes(ymin=conf.int1, ymax=conf.int2), width=0.5, cex=1, color = drugpal[1])+
  coord_flip()+
  scale_y_continuous(limits=c(-0.1,1), 
                     breaks=c(0,0.25,0.5,0.75,1),
                     labels=c("0","0.25","0.50","0.75","1"))+
  scale_x_discrete(position = "top") + 
  ylab("Pearson Coefficient")+
  xlab("")+
  t2+
  theme(axis.line.y = element_blank())+
  guides(color="none") 

Fig1D

5.5 Fig 1E

Forrest plot of top 8 cytokine - cytokine Pearson Correlation Coefficients
Generate Cytokine only viability matrix

##Select cytokine data and generate viability matrix
Cytokine_mat <- 
  df %>%
  dplyr::filter(Drug =="DMSO", Cytokine !="No Cytokine") %>%
  #Get log(viability) values for all cytokines, for each patient
  dplyr::select(PatientID, Cytokine, Log) %>%
  spread(Cytokine, Log) %>%
  dplyr::select(-"PatientID") %>%
  as.matrix()

Calculate Pearson Coefficients for all cytokine-cytokine combinations

#Pearson Coefficients for all cytokine-cytokine combinations
#get a list of all combinations of stimuli
cyt.comb <- 
  combn(colnames(Cytokine_mat), 2) %>% 
  t() %>% 
  as.data.frame()  %>% 
  mutate(comb = paste(V1, V2, sep = ":"))


#set up list of dataframes to store outputs of cor.test, for each cytokine - cytokine combination
cyt.corr <- vector("list", length = nrow(cyt.comb))

#set names as cytokine - cytokine combinations
names(cyt.corr)  <- cyt.comb$comb


#calculate Correlations and confidence intervals for every combination
for(cyt1 in colnames(Cytokine_mat)){
  for(cyt2 in colnames(Cytokine_mat)){
    
    #subset cytokine viability matrix for each cytokine of interest
    x <- Cytokine_mat[,cyt1]
    y <- Cytokine_mat[,cyt2]
    
    #define combination
    comb <-paste(cyt1, cyt2, sep=":")
    
    #only run if this combinationn is not redundant
    if(comb %in% names(cyt.corr)){
    
    #run correlation test
    cor <- cor.test(x, y, conf.level = 0.95, method = c("pearson"))
    
    #convert to table
    cor.df <- unlist(cor) %>% as.data.frame()
    colnames(cor.df) <- comb
    
    #add to results dataframe
    cyt.corr[[comb]] <- cor.df
    }
   
  }
}



cyt.corr <- do.call(cbind, cyt.corr)

Arrange data for plotting

#Cytokine Forest plot
#transform dataframe
cyt.corr <- 
  cyt.corr %>% 
  rownames_to_column("feature") %>% 
  t() %>% 
  as.data.frame()

#tidy up cyt.corr dataframe
colnames(cyt.corr) <- cyt.corr[1,]
cyt.corr <- cyt.corr[-1,]
cyt.corr <- cyt.corr %>% rownames_to_column("Combination")

#sort out class types
cyt.corr$estimate.cor <- as.numeric(cyt.corr$estimate.cor)
cyt.corr$conf.int1 <- as.numeric(cyt.corr$conf.int1)
cyt.corr$conf.int2 <- as.numeric(cyt.corr$conf.int2)

#get top 8 highest correlations
sig.cyt <- 
  cyt.corr %>%
  dplyr::filter(Combination %in% c("IL-4:sCD40L+IL-4", 
                                   "Resiquimod:CpG ODN",  
                                   "sCD40L:BAFF", 
                                   "IL-1b:soluble anti-IgM", 
                                   "Resiquimod:IL-4", 
                                   "IL-6:IL-4", 
                                   "IL-6:IL-10", 
                                   "Interferon gamma:IL-10")) 
  

#Make cytokine combinations readable
sig.cyt$Combination <- gsub(':', ' and ', sig.cyt$Combination)

#set plotting order
order.cyt <- sig.cyt %>%
            dplyr::arrange(estimate.cor) %>%
            dplyr::select(Combination) %>%
            unlist()
  
sig.cyt$Combination <- factor(sig.cyt$Combination, levels=order.cyt)
#Plot Forest Plot
Fig1E = 
  ggplot(sig.cyt, 
         aes(x = Combination, y = estimate.cor, ymin = conf.int1, ymax = conf.int2 ))+
  geom_pointrange(color = cytpal[1])+
  geom_errorbar(aes(ymin=conf.int1, ymax=conf.int2),width=0.5,cex=1, color = cytpal[1])+
  coord_flip()+
  scale_y_continuous(limits=c(-0.1,1), 
                     breaks=c(0,0.25,0.5,0.75,1),
                     labels=c("0","0.25","0.50","0.75","1"))+
  ylab("Pearson Coefficient")+
  xlab("")+
  scale_x_discrete(position = "top", labels=c("IL-4 and sCD40L+IL-4"="IL4 and sCD40L + IL4","IL-1b and soluble anti-IgM"="IL1\u03B2 and soluble anti-IgM","Resiquimod and IL-4"="Resiquimod and IL4", "IL-6 and IL-10"="IL6 and IL10" ,"Interferon gamma and IL-10"="Interferon \u03B3 and IL10"))+
  t2 +
  theme(axis.line.y = element_blank()) +
  guides(color="none")
  
Fig1E

6 Assemble Figure

tp <- theme(plot.tag=element_text(size = 30, face="plain"))

design1 <-"
  ABBC
  DDEE
"

patchwork_plot <-
  
  Fig1A + tp+
  Fig1B + tp+
  wrap_elements(Fig1C ) + tp+
  wrap_elements(Fig1D) + tp+
  wrap_elements(Fig1E) + tp+
  
  plot_annotation(tag_levels = "A") +
  plot_layout(design = design1, heights = c(0.7,1), width=c(1,0.8,0.8,1))


patchwork_plot

7 Appendix

Sys.info()
##                                                                                             sysname 
##                                                                                            "Darwin" 
##                                                                                             release 
##                                                                                            "19.6.0" 
##                                                                                             version 
## "Darwin Kernel Version 19.6.0: Thu May  6 00:48:39 PDT 2021; root:xnu-6153.141.33~1/RELEASE_X86_64" 
##                                                                                            nodename 
##                                               "mac-huber20.academic.christs.cam.ac.uk.pool.embl.de" 
##                                                                                             machine 
##                                                                                            "x86_64" 
##                                                                                               login 
##                                                                                             "holly" 
##                                                                                                user 
##                                                                                             "holly" 
##                                                                                      effective_user 
##                                                                                             "holly"
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.1                          
##  [2] stringr_1.4.0                          
##  [3] dplyr_1.0.7                            
##  [4] purrr_0.3.4                            
##  [5] readr_1.4.0                            
##  [6] tibble_3.1.2                           
##  [7] tidyverse_1.3.1                        
##  [8] tidyr_1.1.3                            
##  [9] rlang_0.4.11                           
## [10] maxstat_0.7-25                         
## [11] data.table_1.14.0                      
## [12] png_0.1-7                              
## [13] formattable_0.2.1                      
## [14] broom_0.7.8                            
## [15] scales_1.1.1                           
## [16] gtable_0.3.0                           
## [17] genomation_1.24.0                      
## [18] ChIPseeker_1.28.3                      
## [19] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [20] GenomicFeatures_1.44.0                 
## [21] msigdbr_7.4.1                          
## [22] org.Hs.eg.db_3.13.0                    
## [23] AnnotationDbi_1.54.1                   
## [24] clusterProfiler_4.0.0                  
## [25] ConsensusClusterPlus_1.56.0            
## [26] glmnet_4.1-2                           
## [27] Matrix_1.3-4                           
## [28] survminer_0.4.9                        
## [29] ggpubr_0.4.0                           
## [30] DESeq2_1.32.0                          
## [31] SummarizedExperiment_1.22.0            
## [32] Biobase_2.52.0                         
## [33] MatrixGenerics_1.4.0                   
## [34] matrixStats_0.59.0                     
## [35] GenomicRanges_1.44.0                   
## [36] GenomeInfoDb_1.28.1                    
## [37] IRanges_2.26.0                         
## [38] S4Vectors_0.30.0                       
## [39] BiocGenerics_0.38.0                    
## [40] ggfortify_0.4.11                       
## [41] pheatmap_1.0.12                        
## [42] magrittr_2.0.1                         
## [43] ggbeeswarm_0.6.0                       
## [44] ggrepel_0.9.1                          
## [45] RColorBrewer_1.1-2                     
## [46] cowplot_1.1.1                          
## [47] patchwork_1.1.1                        
## [48] magick_2.7.2                           
## [49] gridExtra_2.3                          
## [50] Hmisc_4.5-0                            
## [51] ggplot2_3.3.5                          
## [52] Formula_1.2-4                          
## [53] survival_3.2-11                        
## [54] lattice_0.20-44                        
## [55] plyr_1.8.6                             
## [56] BiocStyle_2.20.2                       
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.3           rtracklayer_1.52.0       exactRankTests_0.8-32   
##   [4] bit64_4.0.5              knitr_1.33               DelayedArray_0.18.0     
##   [7] rpart_4.1-15             KEGGREST_1.32.0          RCurl_1.98-1.3          
##  [10] generics_0.1.0           RSQLite_2.2.7            shadowtext_0.0.8        
##  [13] bit_4.0.4                enrichplot_1.12.2        lubridate_1.7.10        
##  [16] xml2_1.3.2               assertthat_0.2.1         viridis_0.6.1           
##  [19] xfun_0.24                hms_1.1.0                babelgene_21.4          
##  [22] evaluate_0.14            fansi_0.5.0              restfulr_0.0.13         
##  [25] progress_1.2.2           caTools_1.18.2           dbplyr_2.1.1            
##  [28] readxl_1.3.1             km.ci_0.5-2              igraph_1.2.6            
##  [31] DBI_1.1.1                geneplotter_1.70.0       htmlwidgets_1.5.3       
##  [34] ellipsis_0.3.2           backports_1.2.1          bookdown_0.22.3         
##  [37] annotate_1.70.0          gridBase_0.4-7           biomaRt_2.48.2          
##  [40] vctrs_0.3.8              abind_1.4-5              cachem_1.0.5            
##  [43] withr_2.4.2              ggforce_0.3.3            BSgenome_1.60.0         
##  [46] checkmate_2.0.0          GenomicAlignments_1.28.0 treeio_1.16.1           
##  [49] prettyunits_1.1.1        cluster_2.1.2            DOSE_3.18.1             
##  [52] ape_5.5                  lazyeval_0.2.2           crayon_1.4.1            
##  [55] genefilter_1.74.0        pkgconfig_2.0.3          tweenr_1.0.2            
##  [58] nlme_3.1-152             vipor_0.4.5              nnet_7.3-16             
##  [61] lifecycle_1.0.0          downloader_0.4           filelock_1.0.2          
##  [64] BiocFileCache_2.0.0      modelr_0.1.8             seqPattern_1.24.0       
##  [67] cellranger_1.1.0         polyclip_1.10-0          aplot_0.0.6             
##  [70] KMsurv_0.1-5             carData_3.0-4            boot_1.3-28             
##  [73] zoo_1.8-9                reprex_2.0.0             base64enc_0.1-3         
##  [76] beeswarm_0.4.0           viridisLite_0.4.0        rjson_0.2.20            
##  [79] bitops_1.0-7             KernSmooth_2.23-20       Biostrings_2.60.1       
##  [82] blob_1.2.1               shape_1.4.6              pdftools_3.0.1          
##  [85] qvalue_2.24.0            qpdf_1.1                 jpeg_0.1-8.1            
##  [88] rstatix_0.7.0            ggsignif_0.6.2           memoise_2.0.0           
##  [91] gplots_3.1.1             zlibbioc_1.38.0          compiler_4.1.0          
##  [94] scatterpie_0.1.6         BiocIO_1.2.0             plotrix_3.8-1           
##  [97] cli_3.0.0                Rsamtools_2.8.0          XVector_0.32.0          
## [100] htmlTable_2.2.1          MASS_7.3-54              tidyselect_1.1.1        
## [103] stringi_1.6.2            highr_0.9                yaml_2.2.1              
## [106] GOSemSim_2.18.0          askpass_1.1              locfit_1.5-9.4          
## [109] latticeExtra_0.6-29      survMisc_0.5.5           fastmatch_1.1-0         
## [112] tools_4.1.0              rio_0.5.27               rstudioapi_0.13         
## [115] foreach_1.5.1            foreign_0.8-81           farver_2.1.0            
## [118] ggraph_2.0.5             digest_0.6.27            rvcheck_0.1.8           
## [121] BiocManager_1.30.16      Rcpp_1.0.6               car_3.0-11              
## [124] httr_1.4.2               colorspace_2.0-2         rvest_1.0.0             
## [127] fs_1.5.0                 XML_3.99-0.6             splines_4.1.0           
## [130] tidytree_0.3.4           graphlayouts_0.7.1       xtable_1.8-4            
## [133] jsonlite_1.7.2           ggtree_3.0.2             tidygraph_1.2.0         
## [136] R6_2.5.0                 pillar_1.6.1             htmltools_0.5.1.1       
## [139] glue_1.4.2               fastmap_1.1.0            BiocParallel_1.26.1     
## [142] codetools_0.2-18         fgsea_1.18.0             mvtnorm_1.1-2           
## [145] utf8_1.2.1               curl_4.3.2               gtools_3.9.2            
## [148] zip_2.2.0                GO.db_3.13.0             openxlsx_4.2.4          
## [151] rmarkdown_2.9            munsell_0.5.0            DO.db_2.9               
## [154] GenomeInfoDbData_1.2.6   iterators_1.0.13         impute_1.66.0           
## [157] haven_2.4.1              reshape2_1.4.4

In this sub-vignette we present the analysis and source code for Figure 2. This sub-vignette can be built along with all others, to generate all figures, by running CLLCytokineScreen2021.Rmd.

8 Set up

Load libraries

library(RColorBrewer)
library(ggrepel)
library(patchwork)
library(ggplot2)
library(tidyr)
library(ggbeeswarm)
library(magrittr)
library(pheatmap)
library(ggfortify)
library(gridExtra)
library(DESeq2)
library(survival)
library(survminer)
library(glmnet)
library(ConsensusClusterPlus)
library(clusterProfiler)
library(org.Hs.eg.db)
library(msigdbr)
library(dplyr)
library(tidyverse)

Set plotting directory

plotDir = ifelse(exists(".standalone"), "", "../../inst/figs/")
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)

9 Load data

Raw

#df: tibble containing all screening viability data
load( "../../data/df.RData")

#patMeta: tibble containing all patient genetic data
load( "../../data/patMeta.RData")

#LDT: tibble containing Lymhpocyte DOubling Times for patients in screen
load( "../../data/LDT.RData")

#survT: tibble containing disease progression data for patients in screen 
load( "../../data/survT.RData")

#dds_smp: DESeqDataSet containing RNA counts and assosciated meta data for matched samples to those in screen 
load( "../../data/dds_smp.RData")
#From tsvs
#screening data
df <- read.table(file= "../../inst/extdata/df.txt",header = TRUE) %>% as_tibble()

#Patient meta data
patMeta <- read.table(file= "../../inst/extdata/patMeta.txt",header = TRUE) %>% as_tibble()

#Lymphocyte Doubling Times
LDT <- read.table(file= "../../inst/extdata/LDT.txt",header = TRUE) %>% as_tibble()

#Survival times
survT <- read.table(file= "../../inst/extdata/survT.txt",header = TRUE) %>% as_tibble()

#RNA data
rna_countsMatrix <- read.table(file=  "../../inst/extdata/rna_countsMatrix.txt", header = TRUE) 
coldata_rna <- read.table(file="../../inst/extdata/coldata_rna.txt", header= TRUE)
rowdata_rna <- read.table(file=  "../../inst/extdata/rowdata_rna.txt", header= TRUE) 


#Arrange data for analysis
#screening data
df$Cytokine <- factor(df$Cytokine, levels= c("No Cytokine",
                                              "Resiquimod", 
                                              "IL-4", 
                                              "TGF-b1",
                                              "IL-1b",
                                              "Interferon gamma",
                                              "SDF-1a",
                                              "sCD40L" ,
                                              "sCD40L+IL-4",
                                              "soluble anti-IgM", 
                                              "CpG ODN",
                                              "IL-6",
                                              "IL-10",
                                              "IL-21",
                                              "HS-5 CM", 
                                              "IL-15",
                                              "BAFF",
                                              "IL-2" ))


#patMeta
patMeta[sapply(patMeta, is.character)] <- lapply(patMeta[sapply(patMeta, is.character)], as.factor)
patMeta[sapply(patMeta, is.integer)] <- lapply(patMeta[sapply(patMeta, is.integer)], as.factor)
patMeta$PatientID <- as.character(patMeta$PatientID)

#RNA
#assemble deseq object
rownames(coldata_rna) <- coldata_rna$PatientID
dds_smp <- DESeqDataSetFromMatrix(rna_countsMatrix, coldata_rna, design =~1)
rowData(dds_smp) <- rowdata_rna

Process data

# Subset data to Cytokine only treatments, no drugs 

df <- dplyr::filter(df, Drug == "DMSO", Cytokine != "No Cytokine")

10 Define Aesthetics

source("../../R/themes_colors.R")

11 Define additional functions

deckel function: to set limits of scaling factor. Accepts a number x, and two numeric limits, lower and upper.

deckel <- function(x, lower = -Inf, upper = +Inf) ifelse(x<lower, lower, ifelse(x>upper, upper, x))

medianCenter_MadScale function: to scale viability values according to MAD and then center values at zero. Maximum/minimum size of scaling factor set with deckel (above). Accepts a vector x to scale.

medianCenter_MadScale <- function(x) {
  s=0
  (x - s) / deckel(mad(x, center = s), lower = 0.05, upper = 0.2)
}

scaleCytResp function: to apply medianCenter_MadScale row wise to viability matrix. Accepts x, a matrix of log(viability) values.

scaleCytResp  <- function(x) t(apply(x, 1, medianCenter_MadScale)) 

Hierarchical clustering function: To provide cluster function for running ConsensusClusterPlus. Accepts this_dist, a dissimilarity structure as produced by dist and k, the number of clusters to assign.

myclusterfunction = function(this_dist, k){
    #run hierarchical cluster analysis on dissimilarity structure this_dist  
    hc = hclust(this_dist)
    #cut cut tree into k groups 
    assignment = cutree(hc, k)
    return(assignment)
}

Euclidean Distance function: To provide distance function for running ConsensusClusterPlus. Calculates Euclidean distances for matrix x.

myDistFunc = function(x){ dist(x, method="euclidean")}

Factor2Ind: To generate indicator matrix from a factor. Given a factor x, create an indicator matrix of dimension length(x) multiplied by nlevels(x)-1, dropping the column corresponding to the baseline level (by default the first level is used as baseline).

factor2ind <- function(x, baseline)
{

  xname <- deparse(substitute(x))
  n <- length(x)
  x <- as.factor(x)
  if(!missing(baseline)) x <- relevel(x, baseline)
  X <- matrix(0, n, length(levels(x)))
  X[(1:n) + n*(unclass(x)-1)] <- 1
  X[is.na(x),] <- NA
  dimnames(X) <- list(names(x), paste(xname, levels(x), sep = ":"))
  return(X[,-1,drop=FALSE])
}

Function for multinomial regression: To perform multinomial regression to identify genetic features that are predictors of cluster assignment. Provide a feature matrix X and a response matrix y, and specify method (regression method), repeats (number of repeats of the regression) and folds (number of folds to split the data into for cross validation).

runGlm.multi <- 
  function(X, y, method = "ridge", repeats=20, folds = 3) {
  modelList <- list()
  lambdaList <- c()
  
  coefMat <- 
    lapply(unique(y), function(n) {
    mat <- matrix(NA, ncol(X), repeats)
    rownames(mat) <- colnames(X)
    mat
  })
  
  names(coefMat) <- unique(y)
  
  alpha = switch(method, lasso = 1, ridge = 0, stop("Please provide a valid method: lasso or ridge"))

  
  for (i in seq(repeats)) {

      #balanced sampling
      vecFold <- mltools::folds(y, nfolds = folds, stratified = TRUE, seed = i*1996)
      res <- cv.glmnet(X, y, type.measure = "class",
                       foldid = vecFold, alpha = alpha, standardize = FALSE,
                       intercept = TRUE, family = "multinomial")
      lambdaList <- c(lambdaList, res$lambda.min)
      modelList[[i]] <- res
      
      coefModel <- coef(res, s = "lambda.min")
      for (n in names(coefModel)) {
        coefMat[[n]][,i] <- coefModel[[n]][-1]
      }
  }
  list(modelList = modelList, lambdaList = lambdaList, coefMat = coefMat)
  }

Sum coefficients: to drop all features from multinomial regression that don’t meet specified cut off criteria, and gather coefficients for all remaining other features, for all repeats of the regression. Accepts a matrix coefMat, plus numeric values for coefCut, the minimum value of that the average coefficient should be, and freqCut, the minimum proportion of repeats that a coefficient should be significant.

sumCoef <- function(coefMat, coefCut = 0, freqCut =1) {
  meanCoef <- rowMeans(abs(coefMat))
  freqCoef <- rowMeans(coefMat != 0)
  subMat <- coefMat[meanCoef > coefCut & freqCoef >= freqCut,,drop=FALSE]
  eachTab <- data.frame(subMat) %>%
     rownames_to_column("feature") %>% gather(key = "rep",value = "coef",-feature) %>%
     mutate(rep = gsub("X","",rep))
  return(eachTab)
} 

12 Plot Figures

12.1 Figure 2A

Heatmap of all scaled log(viability) values, for all patient samples and stimuli.
In the code below, the viability data is normalised to DMSO controls and each row is scaled according to MAD. Limits are applied to scaling factor for optimal visualisation. The heatmap is plotted using pheatmap: the ordering of the columns (patient samples) is obtained from the dendrogram that results from runninng ConsensusClusterPlus using hierarchical clustering with the Euclidean metric. The rows are globally ordered using the dendrogram order produced by hclust with default branch arrangement.

########### Viability matrix ################
#make viability matrix for cytokine treatments for patients
viab.mat <- dplyr::select(df, 
                          PatientID, 
                          Log, 
                          Cytokine) %>% 
            tidyr::spread(Cytokine, Log) %>%
  as.data.frame()

#make PatientID the row names
rownames(viab.mat) <- unlist(viab.mat[,1]) # the first row will be the header
viab.mat <- dplyr::select(viab.mat,-PatientID) 

#Transform matrix
viab.mat <- t(viab.mat)

#keep unscaled matrix 
viab.mat.unscaled <- viab.mat

#run scaleCytResp on viability matrix
viab.mat <- scaleCytResp(viab.mat)

#apply deckel function to matrix
#Calculate dendrogram 
breaks <- c(seq(-3, 3, length.out = 101)) 

viab.mat.lims <- deckel(viab.mat,
                            lower = dplyr::first(breaks),
                            upper = dplyr::last(breaks))

Run consensus clustering

results = ConsensusClusterPlus(d = viab.mat.lims, 
                               maxK = 7, 
                               reps = 10000, 
                               pItem = 0.8, 
                               pFeature = 1,  
                               clusterAlg = "myclusterfunction",
                               distance = "myDistFunc", 
                               plot = NULL, 
                               seed = 1386, 
                               finalLinkage = "complete", 
                               innerLinkage = "complete")
## end fraction
## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

Get cluster annotations

#Extract consensus clusters for k = 4
clusters <- 
  results[[4]][["consensusClass"]] %>%
  tibble::enframe() %>% 
  dplyr::rename(PatientID="name", Cluster="value") 

#get table of clusters
clusters <- 
  clusters %>% 
  dplyr::select(PatientID, Cluster)

#get matrix
cluster_matrix <- as.dist( 1 - results[[4]]$ml )

#add clusters to patMeta
patMeta_cl <- left_join(clusters, patMeta, by = "PatientID")

Arrange annotations for heatmap

#Sort heatmap annotations
#Select annotations from patMeta_cl
Heatmap_Annotation <- dplyr::select(patMeta_cl, 
                                    PatientID, 
                                    IGHV.status, 
                                    trisomy12,
                                    TP53,
                                    ATM,
                                    treatment, 
                                    gender, 
                                    Cluster) %>%
  
  #Adjust names/levels for legend
  mutate(treatment=case_when(treatment==0~"No", 
                             treatment==1~"Yes")) %>%

  mutate(gender=case_when(gender=="f"~"Female", 
                          gender=="m"~"Male")) %>%
  
  mutate(IGHV.status=case_when(IGHV.status=="U"~"Unmutated", 
                               IGHV.status=="M"~"Mutated"))


#Rename columns for legend
colnames(Heatmap_Annotation) <- c("PatientID", 
                                  "IGHV status",
                                  "trisomy 12", 
                                  "TP53", 
                                  "ATM",
                                  "Pretreated", 
                                  "Sex", 
                                  "Cluster")

#make Pat IDs the rownames
Heatmap_Annotation <- as.data.frame(Heatmap_Annotation)
rownames(Heatmap_Annotation) <- unlist(Heatmap_Annotation$PatientID)

#Tidy Up Heatmap Annotation table
Heatmap_Annotation$Cluster <- as.factor(Heatmap_Annotation$Cluster)

Heatmap_Annotation <- dplyr::select(Heatmap_Annotation,-"PatientID")

Define aesthetics for heatmap and annotations

########### Set Heatmap Aesthetics ###############
#Generate red-blue divergent palette
breaks <- c(seq(-3, 3, length.out = 101)) %>% `names<-`(
     colorRampPalette(c(palblues, 
                       "white",  "white", "white",
                         palreds))(101))

#Specify colors of annotations 
 ann_colors = 
   list(
    "IGHV status" = c("Unmutated"=IGHV[1],
                      "Mutated"=IGHV[2]),
    "Sex" = c("Female"=Sex[1],
              "Male"=Sex[2]),
    "Pretreated" = c("No"=Mutant[1],
                  "Yes"=Mutant[2]),
    "trisomy 12" = c("1"=Mutant[2],
                     "0"=Mutant[1]),
    "ATM" = c("1"=Mutant[2],
              "0"=Mutant[1]),
    "TP53" = c("1"=Mutant[2],
               "0"=Mutant[1]),
    "Cluster" = c("1"=colors[1],
                  "2"=colors[2],
                  "3"=colors[3],
                  "4"=colors[4]))

Plot Figure

#Plot heatmap
Fig2A <- 
pheatmap(viab.mat.lims,  
         scale = "none",
         clustering_method = "complete", 
         cluster_cols = TRUE,
         show_colnames = FALSE,
         cutree_cols = 4,
         clustering_distance_cols = cluster_matrix,
         annotation_col = Heatmap_Annotation,
         annotation_colors = ann_colors,
         cellheight = 22,
         cellwidth = 4,
         breaks = breaks,
         color= names(breaks),
         border_color=NA,
         fontsize=12,
         fontsize_row=16,
         legend_breaks = c(-3,0,3),
         labels_row = c("SDF-1\u03B1","TGF-\u03B21","IL6","IL10","Resiquimod","CpG ODN","IL4","sCD40L + IL4","sCD40L","BAFF","IL1\u03B2","soluble anti-IgM","IL15","IL2","Interferon \u03B3","IL21","HS-5 CM") ) 
        

Fig2A

12.2 Figure 2B

#plot LDT stratified by Patient Clusters
Fig2B <-
  patMeta_cl %>%
  #join patient meta data with clusters, to LDT dataframe
  left_join(  dplyr::select(LDT, PatientID, doubling.time), by=c("PatientID"="PatientID")) %>%
  dplyr::filter(!is.na(doubling.time)) %>%
  dplyr::mutate(Cluster=as.factor(Cluster)) %>%
  
  ggplot(aes(x=Cluster, y=doubling.time,  group=Cluster, fill=Cluster)) +
  geom_boxplot() +
  geom_beeswarm() +
  scale_fill_manual(values=colors[1:4]) +
  scale_y_log10() +
  guides(fill="none") +
  ylab("LDT in years") +
  t2 +
  # stat_compare_means(method="kruskal.test", size=6, label.x = 1.3, label.y=2.1)+
  stat_compare_means(method="t.test", comparison=list(c(1,2),c(3,4)), size=6, label.x = 1.3, label.y=2.1)


Fig2B

# 
# #run t test to compare clusters 3 and 4
# patMeta_cl %>%
#   left_join(  dplyr::select(LDT, PatientID, doubling.time), by=c("PatientID"="PatientID")) %>%
#   dplyr::filter(!is.na(doubling.time)) %>%
#   dplyr::mutate(Cluster=as.factor(Cluster)) %>% 
#   dplyr::filter(Cluster %in% c(3,4)) %>% 
#   mutate(doubling.time=log10(doubling.time)) %>% 
#   
#   t.test(doubling.time~Cluster, . )

12.3 Figure 2C

Here we plot KM curves to show TTT stratified by cluster. Clinical follow-up information to calculate OS was available for all 192 patients. For 188 patients treatment information after sample collection was available.

######Arrange data######
clusters.surv <- left_join(clusters, survT, by = "PatientID")

#Add meta data to heatmap clusters
clusters.surv <- left_join(clusters.surv, patMeta, by="PatientID")

Univariate Cox Proportional Hazards
Cluster 1 as reference

coxph_summ <- clusters.surv%>%
  mutate(Cluster=factor(Cluster))%>%
    coxph(Surv( TTT, treatedAfter) ~  factor2ind(Cluster, 1),  data=.) %>% 
    summary()
coxph_summ$coefficients %>% round(5)
##                                     coef exp(coef) se(coef)        z Pr(>|z|)
## factor2ind(Cluster, 1)Cluster:2  0.47981   1.61577  0.28715  1.67093  0.09474
## factor2ind(Cluster, 1)Cluster:3 -0.24052   0.78622  0.27089 -0.88786  0.37461
## factor2ind(Cluster, 1)Cluster:4 -1.29254   0.27457  0.33893 -3.81361  0.00014
C1vsC2_p <- coxph_summ$coefficients[1,5] %>% round(3)
C1vsC2_p
## [1] 0.095

Cluster 4 as reference

coxph_summ <- clusters.surv%>%
  mutate(Cluster=factor(Cluster))%>%
    coxph(Surv( TTT, treatedAfter) ~  factor2ind(Cluster, 4),  data=.) %>% 
  summary()
coxph_summ$coefficients %>% round(5)
##                                    coef exp(coef) se(coef)       z Pr(>|z|)
## factor2ind(Cluster, 4)Cluster:1 1.29254   3.64203  0.33893 3.81361  0.00014
## factor2ind(Cluster, 4)Cluster:2 1.77235   5.88467  0.38930 4.55263  0.00001
## factor2ind(Cluster, 4)Cluster:3 1.05202   2.86344  0.37578 2.79957  0.00512
C3vsC4_p<-coxph_summ$coefficients[3,5] %>% round(3)
C3vsC4_p
## [1] 0.005

Multivariate Cox Proportional Hazards
Cluster 1 as reference

coxph_summ <- clusters.surv%>%
  mutate(Cluster=factor(Cluster))%>%
    coxph(Surv( TTT, treatedAfter) ~  factor2ind(Cluster, 1)+IGHV.status+trisomy12+TP53,  data=.) %>% 
  summary()
coxph_summ$coefficients
##                                        coef exp(coef)  se(coef)          z
## factor2ind(Cluster, 1)Cluster:2  0.55573674 1.7432248 0.3164764  1.7560134
## factor2ind(Cluster, 1)Cluster:3  0.03979159 1.0405939 0.2981304  0.1334704
## factor2ind(Cluster, 1)Cluster:4 -0.78031774 0.4582604 0.3983280 -1.9589831
## IGHV.statusU                     0.55191581 1.7365768 0.2725341  2.0251255
## trisomy121                      -0.13357407 0.8749627 0.3561725 -0.3750263
## TP531                            1.38977496 4.0139467 0.2607176  5.3305760
##                                     Pr(>|z|)
## factor2ind(Cluster, 1)Cluster:2 7.908611e-02
## factor2ind(Cluster, 1)Cluster:3 8.938214e-01
## factor2ind(Cluster, 1)Cluster:4 5.011477e-02
## IGHV.statusU                    4.285447e-02
## trisomy121                      7.076409e-01
## TP531                           9.790173e-08
C1vsC2_p_mv<-coxph_summ$coefficients[1,5] %>% round(3)
C1vsC2_p_mv
## [1] 0.079

Cluster 4 as reference

coxph_summ <- clusters.surv%>%
  mutate(Cluster=factor(Cluster))%>%
    coxph(Surv( TTT, treatedAfter) ~  factor2ind(Cluster, 4)+IGHV.status+trisomy12+TP53,  data=.) %>%
  summary()
coxph_summ$coefficients
##                                       coef exp(coef)  se(coef)          z
## factor2ind(Cluster, 4)Cluster:1  0.7803177 2.1821655 0.3983280  1.9589831
## factor2ind(Cluster, 4)Cluster:2  1.3360545 3.8040051 0.4618269  2.8929770
## factor2ind(Cluster, 4)Cluster:3  0.8201093 2.2707481 0.3975967  2.0626662
## IGHV.statusU                     0.5519158 1.7365768 0.2725341  2.0251255
## trisomy121                      -0.1335741 0.8749627 0.3561725 -0.3750263
## TP531                            1.3897750 4.0139467 0.2607176  5.3305760
##                                     Pr(>|z|)
## factor2ind(Cluster, 4)Cluster:1 5.011477e-02
## factor2ind(Cluster, 4)Cluster:2 3.816093e-03
## factor2ind(Cluster, 4)Cluster:3 3.914435e-02
## IGHV.statusU                    4.285447e-02
## trisomy121                      7.076409e-01
## TP531                           9.790173e-08
C3vsC4_p_mv<-coxph_summ$coefficients[3,5] %>% round(3)
C3vsC4_p_mv
## [1] 0.039

Plot figure

##Run survfit on TTT
fit <- survfit(Surv(TTT, treatedAfter)~Cluster, clusters.surv)


t_plot<-  t2+theme(plot.title=element_blank(), axis.title.x=element_blank(), legend.key = element_blank())
t_table<- t2+theme(plot.title=element_blank(), panel.grid.major.x = element_blank())

Fig2_KM_surv <- 
  ggsurvplot(fit,
             surv.median.line = "hv", # Add medians survival
             legend.title = "Cluster",# Change legends: title & labels
             legend.labs = c("1", "2","3","4"),
             pval = FALSE,
             risk.table = TRUE,
             palette = c(colors[1], colors[2], colors[3], colors[4]),
             ggtheme = t_plot,
             tables.theme = t_table,
             legend = "bottom",
             xlab="Time in years",
             ylab="Time to next treatment (probability)",
             break.time.by=1, 
             xlim=c(0,6.8))

Fig2_KM_surv_plot <-
  Fig2_KM_surv$plot +
  annotate(geom="text", x=6.2, y=0.5,    label=C3vsC4_p, color="black", size=6) +
  geom_segment(x = 5.7, xend = 5.7, y = 0.72, yend = 0.28, color="black") +
  geom_segment(x = 5.6, xend = 5.7, y = 0.72, yend = 0.72, color="black") +
  geom_segment(x = 5.6, xend = 5.7, y = 0.281, yend = 0.281, color="black") +
  annotate(geom="text", x=6.8, y=0.23, label=C1vsC2_p, color="black", size=6) +
  geom_segment(x = 6.3, xend = 6.3, y = 0.33, yend = 0.13, color="black") +
  geom_segment(x = 6.2, xend = 6.3, y = 0.33, yend = 0.33, color="black") +
  geom_segment(x = 6.2, xend = 6.3, y = 0.131, yend = 0.131, color="black")


Fig2C <- wrap_elements(Fig2_KM_surv_plot + Fig2_KM_surv$table + plot_layout(ncol=1, heights = c(0.8,0.2)))

Fig2C

12.4 Figure 2D

Genetic predictors of cluster assignment.
Here we use a multinomial linear model with L1-penalty, implemented in the cvglmnet package. As the dependent variable, the cluster assignment for each patient was used. As input to the model, genetic features with more than 20% missing values were excluded, and only patients with complete annotation are included in the model (n=137). As predictors, the genetic mutations and CNVs (p=39) and IGHV status (coded as 0-1) are used,  using 3-fold cross-validation. Misclassification error is used as loss for cross- validation.
Prepare feature matrix

#Generate Matrix
#select features from patient meta file
geneMatrix <- 
  dplyr::select(patMeta,
                -c(gender:treatment)) %>%
  
  #adjust IGHV.status levels  U and M to numeric 1 and 0 
  mutate(IGHV = ifelse(is.na(IGHV.status), NA,
                       ifelse(IGHV.status == "M", 1, 0)), 
         #remove old columns and remove Methylation Cluster
         IGHV.status=NULL, Methylation_Cluster=NULL ) %>%
  
  #convert factors to numeric
  mutate_if(is.factor, as.character) %>%
  mutate_at(vars(-PatientID), as.numeric) %>%

  #convert to matrix format, with patient IDs as rownames
  data.frame() %>% 
  column_to_rownames("PatientID") %>% 
  as.matrix()


#Tidy matrix for use in glmnet function

#Remove genes with higher than 20% missing values
geneMatrix <- geneMatrix[,colSums(is.na(geneMatrix))/nrow(geneMatrix) <= 0.2]

#Filter for patients with complete data
geneMatrix.complete <- geneMatrix[complete.cases(geneMatrix),]


#Combine KRAS, NRAS and BRAF mutations into a single column
#set up empty matrix
Ras_Raf <- matrix(NA, 
                  nrow = nrow(geneMatrix.complete), 
                  ncol = 1)

colnames(Ras_Raf) <- "RAS/RAF"

#add RAS/RAF column to matrix
geneMatrix.complete <- cbind(geneMatrix.complete, Ras_Raf)

#Annotate RAS_RAF where where any of KRAS, NRAS or BRAF are mutated
geneMatrix.complete[,"RAS/RAF"] <- ifelse(geneMatrix.complete[,"KRAS"]==1,1,
                                                ifelse(geneMatrix.complete[,"BRAF"]==1,1,
                                                  ifelse(geneMatrix.complete[,"NRAS"]==1, 1, 0)))


#remove KRAS, NRAS and BRAF columns
geneMatrix.complete <- 
  geneMatrix.complete[, colnames(geneMatrix.complete) != "KRAS"]

geneMatrix.complete <- 
  geneMatrix.complete[, colnames(geneMatrix.complete) != "BRAF"]

geneMatrix.complete <- 
  geneMatrix.complete[, colnames(geneMatrix.complete) != "NRAS"]

Prepare response matrix

#get Clusters and Patients
y <- 
  dplyr::select(patMeta_cl, PatientID, Cluster) %>% 
  column_to_rownames("PatientID")

#Cluster column as.numeric
y$Cluster <- as.numeric(as.character(y$Cluster))

#transform matrix
y <- t(y)

#Match response matrix and feature matrix
y <- y[,rownames(geneMatrix.complete)]

Run multinomial regression

X <- geneMatrix.complete
y <- y

res <- runGlm.multi(X, y, method = "lasso", repeats = 50, folds = 3)

Prepare table of coefficients for plot

coefList <- res$coefMat

#Filter for features whose coefficients meet the frequency and minimum value thresholds
coefTab <- lapply(names(coefList), function(d) {
   coefMat <- coefList[[d]]
   coefTab <- sumCoef(coefMat=coefMat, freqCut = 0.6, coefCut = 0.35) %>%
     mutate(cluster = d)
}) %>% bind_rows()

Plot Figure 2D:
Coefficients (feature importance) for each cluster

#average remaining coefficients in dataframe for plotting
#ie show all sig features, the cluster they predict, and the average coefficient 
plotTab <-
  coefTab %>%
  dplyr::group_by(feature, cluster) %>% 
  dplyr::summarise(meanCoef = mean(coef),
                   sdCoef = sd(coef)) %>%
  dplyr::arrange(desc(meanCoef)) %>% 
  ungroup()
## `summarise()` has grouped output by 'feature'. You can override using the `.groups` argument.
#update labelling 
plotTab <- plotTab %>% mutate(feature = ifelse(feature == "IGHV", "IGHV status", 
                                   ifelse(feature == "trisomy12", "trisomy 12", 
                                      ifelse(feature == "gain8q", "gain(8q)", feature))))

plotTab$feature <- factor(plotTab$feature,
                          levels=c("IGHV status", 
                                   "trisomy 12",
                                   "SF3B1", 
                                   "POT1",
                                   "ATM", 
                                   "TP53",
                                   "RAS/RAF",
                                   "gain(8q)"))


Cluster.labs <- c("Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4")
names(Cluster.labs) <- c(1, 2, 3, 4)
 

Fig2D <- 
    ggplot(plotTab, aes(x=feature, y=meanCoef))+ 
    geom_bar(stat= "identity", aes(fill = cluster))+
    geom_errorbar(aes(ymin = meanCoef - sdCoef, 
                      ymax = meanCoef + sdCoef))+
    ggtitle("") +
    coord_cartesian(ylim=c(-1.5, 1.75))+
    geom_hline(yintercept=0)+
    ylab("Depleted / Enriched\n")+
    xlab("")+
    scale_fill_manual(values=colors[1:4])+
    t1+
   t.leg+

   theme(strip.text = element_text(size = fontsize),
         axis.text.x= element_text(size=fontsize+4, angle=45, vjust=1))+
    facet_wrap(~cluster, 
               nrow=4, 
               strip.position = "right", 
               labeller = labeller(cluster = Cluster.labs))+
   guides(fill = "none")

Fig2D

12.5 Figure 2E

GSEA of differentially expressed genes between clusters.
Here, for the n=49 patient samples for which we have viability data and RNA–Seq data for matching samples, we search for associations of these two data types. Using DESeq2 we regress RNA-Seq count data on the patient clusters C3, C4 (design formula ~ IGHV.status + Cluster). Genes are ranked by their test statistics and Gene Set Enrichment Analysis (GSEA) implementing the fgsea algorithm with the clusterProfiler package is applied to the ranked lists with the Hallmark pathway gene sets from the MSigDB database.

Prepare RNA count data

cytSeq <- dds_smp

#Filter out IGHV genes
cytSeq <- cytSeq[!grepl("IG_", rowData(cytSeq)$biotype),] 

#split into 3,4
cytSeq.34 <- cytSeq[,colData(cytSeq)[,"Cluster"] %in% c(3,4)]

#remove patients where IGHV is unknown
cytSeq.34 <- cytSeq.34[,colData(cytSeq.34)[,"IGHV.status"] %in% c("U","M")]

#set order of factors
cytSeq.34$Cluster <- factor(cytSeq.34$Cluster, 
                            levels = c("3","4"))

cytSeq.34$IGHV.status <- factor(cytSeq.34$IGHV.status, 
                                levels = c("U","M"))

design(cytSeq.34) <- ~ IGHV.status + Cluster

Run DESeq

cytSeq.34 <- DESeq(cytSeq.34)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 688 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing

Extract results for 3 versus 4

res.ds <- 
  results(cytSeq.34, contrast = c("Cluster", 3, 4), tidy = TRUE) %>%
  dplyr::rename(Symbol = "row") %>% 
  dplyr::arrange(pvalue) 

Add Entrez IDs and readable gene names to results table

#get ensembl ids to Entrez dataframe
ens2entrez <- 
  rowData(cytSeq.34)[c("entrezgene", "symbol")] %>% 
  as.data.frame() %>% 
  rownames_to_column("ENSEMBL") %>%  
  tibble::as_tibble()


#Join 
res.ds <- left_join(res.ds, ens2entrez, by=c("Symbol"="ENSEMBL"))

Get ranks dataframes to feed to fgsea

d <- 
  dplyr::select(res.ds, entrezgene, stat) %>%
  na.omit() %>% 
  dplyr::group_by(entrezgene) %>% 
  dplyr::summarize(stat=mean(stat)) 

## feature 1: numeric vector
geneList <- d$stat

## feature 2: named vector
names(geneList) <- as.character(d$entrezgene)

## feature 3: decreasing order
geneList <- sort(geneList, decreasing = TRUE)
hm2gene <- 
  msigdbr(species = "Homo sapiens", category = "H") %>% 
  dplyr::select(gs_name, entrez_gene)
head(hm2gene)
## # A tibble: 6 x 2
##   gs_name               entrez_gene
##   <chr>                       <int>
## 1 HALLMARK_ADIPOGENESIS          19
## 2 HALLMARK_ADIPOGENESIS       11194
## 3 HALLMARK_ADIPOGENESIS       10449
## 4 HALLMARK_ADIPOGENESIS          33
## 5 HALLMARK_ADIPOGENESIS          34
## 6 HALLMARK_ADIPOGENESIS          35
#tidy up terms
hm2gene$gs_name <- gsub("HALLMARK_", "",hm2gene$gs_name)
hm2gene$gs_name <- gsub("_", " ",hm2gene$gs_name)
hm2gene$gs_name <-gsub("MTORC1 SIGNALING"   ,"MTORC1 Signaling",hm2gene$gs_name)
hm2gene$gs_name <-gsub("MYC TARGETS V1",    "MYC Targets V1"    ,hm2gene$gs_name)
hm2gene$gs_name <-gsub("OXIDATIVE PHOSPHORYLATION", "Oxidative Phosphorylation",hm2gene$gs_name)
hm2gene$gs_name <-gsub("TNFA SIGNALING VIA NFKB",   "TNFa Signalling via NFKB"  ,hm2gene$gs_name)
hm2gene$gs_name <-gsub("UNFOLDED PROTEIN RESPONSE", "Unfolded Protein Response" ,hm2gene$gs_name)   
hm2gene$gs_name <-gsub("UV RESPONSE UP",    "UV Response Up",hm2gene$gs_name)
hm2gene$gs_name <-gsub("INTERFERON GAMMA RESPONSE", "Interferon Gamma Response" ,hm2gene$gs_name)
hm2gene$gs_name <-gsub("G2M CHECKPOINT",    "G2M Checkpoint",hm2gene$gs_name)   
hm2gene$gs_name <-gsub("E2F TARGETS", "E2F Targets" ,hm2gene$gs_name)
hm2gene$gs_name <-gsub("P53 PATHWAY",   "P53 Pathway",hm2gene$gs_name)
#run GSEA
set.seed(1996)
gsea.res <- GSEA(geneList, TERM2GENE = hm2gene, by = "fgsea", seed = TRUE)

#make readable with gene names
gsea.res <- setReadable(gsea.res, org.Hs.eg.db, keyType = "ENTREZID")

Show top 10 pathways

#get dataframe of results
gsea.df <- fortify(gsea.res, 
              showCategory = 10, #how many levels to show
              split=NULL)

#order by NES
gsea.df <- dplyr::mutate(gsea.df, "NES" = eval(parse(text="NES")))
idx <- order(gsea.df[["NES"]], decreasing = TRUE)
gsea.df$Description <- factor(gsea.df$Description, levels=rev(unique(gsea.df$Description[idx])))
    
  
gsea.df$pos <- 0

  Fig2E <-
   ggplot(gsea.df,
         aes_string(x="NES", 
                    y="Description", 
                    fill="p.adjust")) +
    geom_bar(stat = "identity") +
    scale_fill_continuous(low=palreds[7],
                         high=palreds[2], 
                         name = "Adjusted p value",
                         guide=guide_colorbar(reverse=TRUE)) +
    
  ylab("Hallmark Pathway\n\n") + 
  xlab("Normalised Enrichment Score") +
  ggtitle("Upregulation of gene sets in C3 versus C4") +  
  t2 + 
  scale_size(range=c(3, 8)) + 
  geom_text(aes(label=Description, x = pos), nudge_x = 0.1, hjust = 0,  size = 6, color = "white") +
  theme(legend.position=c(0.9, 0.2),
        legend.background = element_blank(),
        legend.box.background = element_rect(size = 0.5),
        legend.title = element_text(face='bold',
                                    hjust = 1, size=11),
        legend.key = element_blank(),
        legend.text = element_text(size=12),
        axis.text.y = element_blank(), 
        axis.ticks.y = element_blank()) 
  

Fig2E

13 Arrange plots

design1<-"
11
23
45
"

tp<-ggplot2::theme(plot.tag=element_text(size = 30))


Figure2 <-
  wrap_elements(arrangeGrob(Fig2A[[4]])) +tp+
  wrap_elements(Fig2B) + tp+
  Fig2C + tp+
  wrap_elements(Fig2D) + tp+
  wrap_elements(Fig2E) + tp+
  
  plot_layout(design=design1, 
              heights = c(1.6, 1.2, 1.2) , 
              widths = c(1,1)) +
  
  plot_annotation(tag_levels = "A")

Figure2

14 Appendix

Sys.info()
##                                                                                             sysname 
##                                                                                            "Darwin" 
##                                                                                             release 
##                                                                                            "19.6.0" 
##                                                                                             version 
## "Darwin Kernel Version 19.6.0: Thu May  6 00:48:39 PDT 2021; root:xnu-6153.141.33~1/RELEASE_X86_64" 
##                                                                                            nodename 
##                                               "mac-huber20.academic.christs.cam.ac.uk.pool.embl.de" 
##                                                                                             machine 
##                                                                                            "x86_64" 
##                                                                                               login 
##                                                                                             "holly" 
##                                                                                                user 
##                                                                                             "holly" 
##                                                                                      effective_user 
##                                                                                             "holly"
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.1                          
##  [2] stringr_1.4.0                          
##  [3] dplyr_1.0.7                            
##  [4] purrr_0.3.4                            
##  [5] readr_1.4.0                            
##  [6] tibble_3.1.2                           
##  [7] tidyverse_1.3.1                        
##  [8] tidyr_1.1.3                            
##  [9] rlang_0.4.11                           
## [10] maxstat_0.7-25                         
## [11] data.table_1.14.0                      
## [12] png_0.1-7                              
## [13] formattable_0.2.1                      
## [14] broom_0.7.8                            
## [15] scales_1.1.1                           
## [16] gtable_0.3.0                           
## [17] genomation_1.24.0                      
## [18] ChIPseeker_1.28.3                      
## [19] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [20] GenomicFeatures_1.44.0                 
## [21] msigdbr_7.4.1                          
## [22] org.Hs.eg.db_3.13.0                    
## [23] AnnotationDbi_1.54.1                   
## [24] clusterProfiler_4.0.0                  
## [25] ConsensusClusterPlus_1.56.0            
## [26] glmnet_4.1-2                           
## [27] Matrix_1.3-4                           
## [28] survminer_0.4.9                        
## [29] ggpubr_0.4.0                           
## [30] DESeq2_1.32.0                          
## [31] SummarizedExperiment_1.22.0            
## [32] Biobase_2.52.0                         
## [33] MatrixGenerics_1.4.0                   
## [34] matrixStats_0.59.0                     
## [35] GenomicRanges_1.44.0                   
## [36] GenomeInfoDb_1.28.1                    
## [37] IRanges_2.26.0                         
## [38] S4Vectors_0.30.0                       
## [39] BiocGenerics_0.38.0                    
## [40] ggfortify_0.4.11                       
## [41] pheatmap_1.0.12                        
## [42] magrittr_2.0.1                         
## [43] ggbeeswarm_0.6.0                       
## [44] ggrepel_0.9.1                          
## [45] RColorBrewer_1.1-2                     
## [46] cowplot_1.1.1                          
## [47] patchwork_1.1.1                        
## [48] magick_2.7.2                           
## [49] gridExtra_2.3                          
## [50] Hmisc_4.5-0                            
## [51] ggplot2_3.3.5                          
## [52] Formula_1.2-4                          
## [53] survival_3.2-11                        
## [54] lattice_0.20-44                        
## [55] plyr_1.8.6                             
## [56] BiocStyle_2.20.2                       
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.3           rtracklayer_1.52.0       exactRankTests_0.8-32   
##   [4] bit64_4.0.5              knitr_1.33               DelayedArray_0.18.0     
##   [7] rpart_4.1-15             KEGGREST_1.32.0          RCurl_1.98-1.3          
##  [10] generics_0.1.0           RSQLite_2.2.7            shadowtext_0.0.8        
##  [13] bit_4.0.4                enrichplot_1.12.2        lubridate_1.7.10        
##  [16] xml2_1.3.2               assertthat_0.2.1         viridis_0.6.1           
##  [19] xfun_0.24                hms_1.1.0                babelgene_21.4          
##  [22] evaluate_0.14            fansi_0.5.0              restfulr_0.0.13         
##  [25] progress_1.2.2           caTools_1.18.2           dbplyr_2.1.1            
##  [28] readxl_1.3.1             km.ci_0.5-2              igraph_1.2.6            
##  [31] DBI_1.1.1                geneplotter_1.70.0       htmlwidgets_1.5.3       
##  [34] ellipsis_0.3.2           backports_1.2.1          bookdown_0.22.3         
##  [37] markdown_1.1             annotate_1.70.0          gridBase_0.4-7          
##  [40] biomaRt_2.48.2           vctrs_0.3.8              abind_1.4-5             
##  [43] cachem_1.0.5             withr_2.4.2              ggforce_0.3.3           
##  [46] BSgenome_1.60.0          checkmate_2.0.0          GenomicAlignments_1.28.0
##  [49] treeio_1.16.1            prettyunits_1.1.1        cluster_2.1.2           
##  [52] DOSE_3.18.1              mltools_0.3.5            ape_5.5                 
##  [55] lazyeval_0.2.2           crayon_1.4.1             genefilter_1.74.0       
##  [58] labeling_0.4.2           pkgconfig_2.0.3          tweenr_1.0.2            
##  [61] nlme_3.1-152             vipor_0.4.5              nnet_7.3-16             
##  [64] lifecycle_1.0.0          downloader_0.4           filelock_1.0.2          
##  [67] BiocFileCache_2.0.0      modelr_0.1.8             seqPattern_1.24.0       
##  [70] cellranger_1.1.0         polyclip_1.10-0          aplot_0.0.6             
##  [73] KMsurv_0.1-5             carData_3.0-4            boot_1.3-28             
##  [76] zoo_1.8-9                reprex_2.0.0             base64enc_0.1-3         
##  [79] beeswarm_0.4.0           viridisLite_0.4.0        rjson_0.2.20            
##  [82] bitops_1.0-7             KernSmooth_2.23-20       Biostrings_2.60.1       
##  [85] blob_1.2.1               shape_1.4.6              pdftools_3.0.1          
##  [88] qvalue_2.24.0            qpdf_1.1                 jpeg_0.1-8.1            
##  [91] rstatix_0.7.0            ggsignif_0.6.2           memoise_2.0.0           
##  [94] gplots_3.1.1             zlibbioc_1.38.0          compiler_4.1.0          
##  [97] scatterpie_0.1.6         BiocIO_1.2.0             plotrix_3.8-1           
## [100] cli_3.0.0                Rsamtools_2.8.0          XVector_0.32.0          
## [103] htmlTable_2.2.1          MASS_7.3-54              tidyselect_1.1.1        
## [106] stringi_1.6.2            highr_0.9                yaml_2.2.1              
## [109] GOSemSim_2.18.0          askpass_1.1              locfit_1.5-9.4          
## [112] latticeExtra_0.6-29      survMisc_0.5.5           fastmatch_1.1-0         
## [115] tools_4.1.0              rio_0.5.27               rstudioapi_0.13         
## [118] foreach_1.5.1            foreign_0.8-81           farver_2.1.0            
## [121] ggraph_2.0.5             digest_0.6.27            rvcheck_0.1.8           
## [124] BiocManager_1.30.16      ggtext_0.1.1             gridtext_0.1.4          
## [127] Rcpp_1.0.6               car_3.0-11               httr_1.4.2              
## [130] colorspace_2.0-2         rvest_1.0.0              fs_1.5.0                
## [133] XML_3.99-0.6             splines_4.1.0            tidytree_0.3.4          
## [136] graphlayouts_0.7.1       xtable_1.8-4             jsonlite_1.7.2          
## [139] ggtree_3.0.2             tidygraph_1.2.0          R6_2.5.0                
## [142] pillar_1.6.1             htmltools_0.5.1.1        glue_1.4.2              
## [145] fastmap_1.1.0            BiocParallel_1.26.1      codetools_0.2-18        
## [148] fgsea_1.18.0             mvtnorm_1.1-2            utf8_1.2.1              
## [151] curl_4.3.2               gtools_3.9.2             zip_2.2.0               
## [154] GO.db_3.13.0             openxlsx_4.2.4           rmarkdown_2.9           
## [157] munsell_0.5.0            DO.db_2.9                GenomeInfoDbData_1.2.6  
## [160] iterators_1.0.13         impute_1.66.0            haven_2.4.1             
## [163] reshape2_1.4.4

In this sub-vignette we present the analysis and source code for Figure 3. This sub-vignette can be built along with all others by running CLLCytokineScreen2021.Rmd.

15 Set up

set.seed(1996)

Load libraries

library(clusterProfiler)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(ChIPseeker)
library(genomation)
library(DESeq2)
library(ggbeeswarm)
library(ggpubr)
library(ggplot2)
library(magrittr)
library(gtable)
library(glmnet)
library(patchwork)
library(scales)
library(gridExtra)
library(ggrepel)
library(broom)
library(plyr)
library(dplyr)
library(tidyverse)

Set plot directory

plotDir = ifelse(exists(".standalone"), "", "../../inst/figs/")
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)

16 Load data

Load raw files

#df : tibble containing all screening data
load( "../../data/df.RData")

#patMeta : tibble containing all patient genetic data
load( "../../data/patMeta.RData")
#From tsvs
#df: tibble containing all screening data
df <- read.table(file= "../../inst/extdata/df.txt",header = TRUE) %>% as_tibble()

#patMeta: tibble containing genetic data for patient samples in screen 
patMeta <- read.table(file= "../../inst/extdata/patMeta.txt",header = TRUE) %>% as_tibble()



#Arrange data for analysis
#screening data
df$Cytokine <- factor(df$Cytokine, levels= c("No Cytokine",
                                              "Resiquimod", 
                                              "IL-4", 
                                              "TGF-b1",
                                              "IL-1b",
                                              "Interferon gamma",
                                              "SDF-1a",
                                              "sCD40L" ,
                                              "sCD40L+IL-4",
                                              "soluble anti-IgM", 
                                              "CpG ODN",
                                              "IL-6",
                                              "IL-10",
                                              "IL-21",
                                              "HS-5 CM", 
                                              "IL-15",
                                              "BAFF",
                                              "IL-2" ))


#patMeta
patMeta %>%mutate_at(vars(-PatientID), as.factor)

Process files

#filter for stimuli - only treatments
df <- dplyr::filter(df, 
                    Drug == "DMSO", 
                    Cytokine != "No Cytokine")

patMeta$treatment <- as.factor(patMeta$treatment)

17 Define Aesthetics

source("../../R/themes_colors.R")

18 Define functions

runGlm: to run multi-variant regression. This function takes a feature matrix X and a continuous response matrix y, to run lasso or ridge regularised regression. The function will run the cv.glmnet function for the chosen number of repeats, applying cross-fold validation using chosen number of folds.

#select lasso or ridge 
runGlm <- function(X, y, method = "lasso", repeats=30, folds = 3) {
  
  #set up objects
  modelList <- list()
  lambdaList <- c()
  varExplain <- c()
  
  #set up a matrix for values of coefficients, with a row for each feature, and a column for each repeat
  coefMat <- matrix(NA, ncol(X), repeats)
  
  #make row names = genetic features
  rownames(coefMat) <- colnames(X)

  #set alpha according to selected method
  if (method == "lasso"){
    alpha = 1
  } else if (method == "ridge") {
    alpha = 0
  }
  
  #Run cv.glmnet for chosen number of repeats 
  for (i in seq(repeats)) {
    
    #if there are more than two features, fit a glm
    if (ncol(X) > 2) {
      
      res <- cv.glmnet(X,y, type.measure = "mse", family="gaussian", 
                       nfolds = folds, alpha = alpha, standardize = FALSE)
      
      #add lambda min from this repeat to the list of lambdas
      lambdaList <- c(lambdaList, res$lambda.min)
      
      #put the res object (with lambdas) into the list of models
      modelList[[i]] <- res
      
      #extract the coefficients for each feature, for lambda.min
      coefModel <- coef(res, s = "lambda.min")[-1] #remove intercept row
      
      #put these coefficients into  column of coefMatrix corresponding to the repeat
      coefMat[,i] <- coefModel
      
      #calculate variance explained
      y.pred <- predict(res, s = "lambda.min", newx = X)
      varExp <- cor(as.vector(y),as.vector(y.pred))^2
      varExplain[i] <- ifelse(is.na(varExp), 0, varExp) 
      
     
      
    } else {
      #if there are only two features, fit a linear model
      fitlm<-lm(y~., data.frame(X))
      varExp <- summary(fitlm)$r.squared
      varExplain <- c(varExplain, varExp)
      
    }

  }
  #gather all lists
  list(modelList = modelList, lambdaList = lambdaList, varExplain = varExplain, coefMat = coefMat)
}

makelegends: to make legends for heatmap plot. AcceptslegendFor, a vector of names of what the legend should show (I (IGHV status), M (Methylation Cluster), G (Gene Mutation)), and colors, a vector of colours corresponding to the elements of legendFor.

makelegends <- function (legendFor, colors) {

    x = NULL
    y = NULL
    colors = colors[names(colors) %in% legendFor]
    nleg = length(colors)
    
    #set up gtable
    wdths = c(0.4,2,2,2,1.5)
    hghts = c(2)
    
    gtl = gtable(widths=unit(wdths, "in"), heights=unit(hghts, "in"))
    n = 2
    
    #legend for Methylation Cluster
    if ("M" %in% names(colors)) {
        Mgg = ggplot(data = data.frame(x = 1, 
                                       y = factor(c("LP", "IP", "HP"), 
                                                  levels = c("LP", "IP", "HP"))), 
                     aes(x = x, y = y, fill = y)) + 
              geom_tile() + 
              scale_fill_manual(name = "Methylation cluster", 
                                values = setNames(colors[["M"]], 
                                                  nm = c("LP", "IP","HP"))) + 
              theme(legend.title = element_text(size = 12), 
                    legend.text = element_text(size = 12))
        
        gtl = gtable_add_grob(gtl, gtable_filter(ggplotGrob(Mgg), "guide-box"), 1, n)
        n = n + 1
    }
    
    #Legend for IGHV status
    if ("I" %in% names(colors)) {
        Igg = ggplot(data = data.frame(x = 1, y = factor(c("Unmutated", 
            "Mutated"), levels = c("Unmutated", "Mutated"))), 
            aes(x = x, y = y, fill = y)) + geom_tile() + scale_fill_manual(name = "IGHV", 
            values = setNames(colors[["I"]], nm = c("Unmutated", "Mutated"))) + 
            theme(legend.title = element_text(size = 12), 
                  legend.text = element_text(size = 12))
        gtl = gtable_add_grob(gtl, gtable_filter(ggplotGrob(Igg), 
            "guide-box"), 1, n)
        n = n + 1
    }
    
    #legend for gene mutations
    if ("G" %in% names(colors)) {
        Ggg = ggplot(data = data.frame(x = 1, y = factor(c("Wild Type", 
            "Mutated"), levels = c("Wild Type", "Mutated"))), 
            aes(x = x, y = y, fill = y)) + geom_tile() + scale_fill_manual(name = "Gene", 
            values = setNames(colors[["G"]], nm = c("Wild Type", "Mutated"))) + 
            theme(legend.title = element_text(size = 12), 
                  legend.text = element_text(size = 12))
        gtl = gtable_add_grob(gtl, gtable_filter(ggplotGrob(Ggg), 
            "guide-box"), 1, n)
        n = n + 1
    }
    
    return(list(plot = gtl, width = sum(wdths), height = sum(hghts)))
}

lassoPlot: to generate predictor profiles using output of runGlm function. Accepts lassoOut, a list of objects generated by runGlm, geneMatrix a feature matrix containing numeric data of mutation status for various genes, viabMatrix, a response matrix containing viability data for corresponding samples to those in geneMatrix, freqCut, the proportion of times a coefficient should be selected to be displayed in the plot, and coefCut, the minimum size of coefficients to display in the plot.

lassoPlot <- function(lassoOut, geneMatrix, viabMatrix, freqCut = 1, coefCut = 0.01) {
  
  plotList <- list()
  
  for (seaName in names(lassoOut)) { 
    ###FOR THE BAR PLOT
    #extract coefMat for each stimulus
    barValue <- rowMeans(lassoOut[[seaName]]$coefMat)
    #extract proportion of repeats for which each coefficient is significant
    freqValue <- rowMeans(abs(sign(lassoOut[[seaName]]$coefMat)))
    #filter out coefficients that don't meet freqCut and coefCut thresholds
    barValue <- barValue[abs(barValue) >= coefCut & freqValue >= freqCut] 
    #arrange the bar values in numerical order
    barValue <- barValue[order(barValue)]
    #if there are no sig coefficients, don't plot
    if(length(barValue) == 0) {
      plotList[[seaName]] <- NA
      next
    }

    ###FOR THE HEATMAP AND SCATTER PLOT
    #get feature matrix and response matrix to plot
    allData <- geneMatrix
    viabValue <- unlist(viabMatrix[seaName,])
    
    #get feature matrix for sig coefficients only
    tabValue <- allData[, names(barValue),drop=FALSE]
    ord <- order(viabValue)
    viabValue <- viabValue[ord]
    tabValue <- tabValue[ord, ,drop=FALSE]
    sampleIDs <- rownames(tabValue)
    tabValue <- as_tibble(tabValue)
    tabValue$Sample <- sampleIDs
    
    
    #annotate mutations by mutation, methylation or IGHV
    matValue <- gather(tabValue, key = "Var",value = "Value", -Sample)
    matValue$Type <- "mut"
    
    #for Methylation Cluster
    matValue$Type[grep("Methylation",matValue$Var)] <- "meth"
    
    #for IGHV status
    matValue$Type[grep("IGHV",matValue$Var)] <- "ighv"
    
    #change the scale of the value so that IGHV, Methylation and Mutation do not overlap
    matValue[matValue$Type == "mut",]$Value = matValue[matValue$Type == "mut",]$Value + 10
    matValue[matValue$Type == "meth",]$Value = matValue[matValue$Type == "meth",]$Value + 20
    matValue[matValue$Type == "ighv",]$Value = matValue[matValue$Type == "ighv",]$Value + 30
    
    #change continuous to categorical
    matValue$Value <- factor(matValue$Value,levels = sort(unique(matValue$Value)))
    
    #arrange order of heatmap
    matValue$Var <- factor(matValue$Var, levels = names(barValue))
    matValue$Sample <- factor(matValue$Sample, levels = names(viabValue))
    
    #change labels if mutation is a Doehner mutation 
    matValue$Var <- revalue(matValue$Var, c("del11q" = "del(11q)", "del13q" = "del(13q)", "del17p" = "del(17p)", "trisomy12" = "trisomy 12")) 
    

   
     #plot the heatmap 
      #title
    if(seaName=="TGF-b1"){
      thetitle <- "TGF-\u03B21"     
      } else {
        if(seaName=='IL-4'){ 
          thetitle <- "IL4"  
          } else {
            thetitle <- seaName
          }
        }
    
      p1 <- ggplot(matValue, aes(x=Sample, y=Var)) + 
            geom_tile(aes(fill=Value), color = "white") + #ghost white
            theme_bw()+
            scale_y_discrete(expand=c(0,0)) + 
            theme(axis.title.y = element_text( size=14),
                  axis.title.x = element_text( size=14),
                  axis.text.x=element_text(hjust=0, size=11),
                  axis.text.y=element_text(hjust=0.1, size=11),
                  axis.ticks=element_blank(),
                  panel.border=element_rect(colour="gainsboro"),  
                  plot.title=element_text(face="bold", size = 18, margin = margin(t = -5, b = 1)), 
                  panel.background=element_blank(),
                  panel.grid.major=element_blank(), 
                  panel.grid.minor=element_blank()) + 
            xlab("Mutation status for each patient") + 
            ylab("") + 
            scale_fill_manual(name="Mutated", 
                              values=c(`10`= offwhite,  #WT
                                       `11`="#373A36", #Mutant
                                       `20`= offwhite, #LP
                                       `20.5`= "#707372", #IP
                                       `21` = "#A8A99E", #HP
                                       `30` = offwhite, #IGHV-U
                                       `31` = "#707372"), #IGHV-M
                                       guide="none") + 
            ggtitle(thetitle)
        

         
    #Plot the bar plot on the left of the heatmap 
    barDF = data.frame(barValue, nm=factor(names(barValue),levels=names(barValue)))
    
    p2 <- ggplot(data=barDF, aes(x=nm, y=barValue)) + 
      geom_bar(stat="identity", 
               fill=ifelse(barValue<0,
                           palblues[6],palreds[8]), 
               colour="black", 
               size=0.3) + 
      scale_x_discrete(expand=c(0,0.5)) + 
      scale_y_continuous(expand=c(0,0)) + 
      coord_flip(ylim=c(-0.3,0.35)) + #changed from min(barValue) and max(barValue)
      theme(panel.grid.major=element_blank(), 
            panel.background=element_blank(), 
            axis.ticks.y = element_blank(),
            panel.grid.minor = element_blank(), 
            axis.text=element_text(size=11, angle = 45, hjust = 1, vjust = 1),
                axis.title = element_text(size=14), 
            panel.border=element_blank()) +
      ylab("Size of predictor") + 
      geom_vline(xintercept=c(0.5), 
                 color="black", 
                 size=0.6)
    
    #Plot the scatter plot under the heatmap
    scatterDF = data.frame(X=factor(names(viabValue), 
                                    levels=names(viabValue)), 
                           Y=unlist(viabValue))
    
    p3 <- 
      ggplot(scatterDF, aes(x=X, y=Y)) + 
      geom_point(shape=21, 
                     fill="dimgrey", 
                     colour="#707372", #dark grey
                     size=1.2) + 
      theme_bw() +
      theme(panel.grid.minor=element_blank(), 
                panel.grid.major.x=element_blank(), 
                #axis.title.x=element_blank(), 
                axis.ticks.x=element_blank(), 
                axis.text.y=element_text(size=11), 
                axis.title.x=element_text(size=14), 
                panel.border=element_rect(colour="dimgrey", size=0.1),
                panel.background=element_rect(fill="white")) +
      xlab("Log(Viability) for each sample")
    
    
    #Assemble all the plots togehter

    # construct the gtable
    wdths = c(0.2, 1.5, 0.2, 1.3*ncol(matValue), 1.5, 0.2)
    hghts = c(0.2, 0.2, 0.0020*nrow(matValue), 0.3, 1, 0.2)
    gt = gtable(widths=unit(wdths, "in"), heights=unit(hghts, "in"))
    
    ## make grobs
    gg1 = ggplotGrob(p1)
    gg2 = ggplotGrob(p2)
    gg3 = ggplotGrob(p3)

    ## fill in the gtable
   
    #HEATMAP
    #5:1 = "PREDICTORS"
    gt = gtable_add_grob(gt, gtable_filter(gg1, "panel"), 3, 4) # add heatmap
    gt = gtable_add_grob(gt, gtable_filter(gg1, "panel"), 3, 4) #add legend
    gt = gtable_add_grob(gt, gtable_filter(gg1, "title"), 1, 4) #add title to plot
    gt = gtable_add_grob(gt, gtable_filter(gg1, "axis-l"), 3, 5) # variable names
    gt = gtable_add_grob(gt, gtable_filter(gg1, "xlab-b"), 2, 4) # axis title
    
    #BARPLOT
    gt = gtable_add_grob(gt, gtable_filter(gg2, "panel"), 3, 2) # add barplot
    gt = gtable_add_grob(gt, gtable_filter(gg2, "axis-b"), 4, 2) # y axis for barplot
    gt = gtable_add_grob(gt, gtable_filter(gg2, "xlab-b"), 2, 2) # y lab for barplot

    
    #SCATTER PLOT
    gt = gtable_add_grob(gt, gtable_filter(gg3, "panel"), 5, 4) # add scatterplot
    gt = gtable_add_grob(gt, gtable_filter(gg3, "xlab-b"), 6, 4) # x label for scatter plot
    gt = gtable_add_grob(gt, gtable_filter(gg3, "axis-l"), 5, 3) #  axis for scatter plot
    
   

    
    #plot
    #grid.draw(gt)
    plotList[[seaName]] <- gt
  }
  return(plotList)
}

19 Plot Figures

19.1 Figure 3A

Overview of t test p values to identify genetic features that affect responses to stimuli.
Here we test for associations between stimulus viability assay results and genomic features by Student’s t-tests (two-sided, with equal variance). We test somatic mutations (aggregated at the gene level), copy number aberrations and IGHV status. We restrict the analysis to features that are present in at least 3 patient samples (63 features). p-values are adjusted for multiple testing by applying the Benjamini-Hochberg procedure.

Run t tests and extract p values

##################### List of mutations with >2 postive cases #############################
selected_mutations <- 
  patMeta %>% 
  mutate(Ras_Raf=as.factor(ifelse(BRAF==1|KRAS==1|NRAS==1, 1,0))) %>% 
  dplyr::select(-BRAF, -KRAS, -NRAS) %>% 
  dplyr::select( -gender, 
                 -diagnosis, 
                 ) %>%
  pivot_longer(-PatientID, 
               names_to="Genetic_Alteration", 
               values_to = "alteration_value") %>% 
  dplyr::filter(alteration_value %in% c(1, "U")) %>% 
  dplyr::group_by(Genetic_Alteration) %>% 
  dplyr::count() %>% 
  dplyr::filter(n>2) %>% 
  dplyr::select(Genetic_Alteration) %>% 
  unlist()


##################### t tests and p-value adjustment ###############################  
p_values <-
  ## Select columns from screening data 
  dplyr::select(df, PatientID, Log, Cytokine) %>% 
  
  ## Join Screening data with metadata
  left_join(patMeta, by = "PatientID") %>%
  
  ## Add Ras/Raf column, remove single columns
  mutate(Ras_Raf=as.factor(ifelse(BRAF==1|KRAS==1|NRAS==1, 1,0))) %>% 
  dplyr::select(-BRAF, -KRAS, -NRAS) %>% 

## remove unused columns from metadata
  dplyr::select( -gender, 
                 -diagnosis, 
                 -treatment) %>% 
  
  ## transform data to long format
  pivot_longer(cols=c(-PatientID, -Log, -Cytokine), 
               names_to = "Genetic_alt", 
               values_to="alt_value") %>% 
  
  ## filter to selected mutations (see above)
  dplyr::filter(Genetic_alt%in%selected_mutations) %>% 
  
  
  ## group by Cytokine and Genetic alteration  
  dplyr::group_by(Cytokine, Genetic_alt) %>% 
  
  ## Perform t.test on every combination of Cytokine and genetic alteration
  do(tidy(t.test(Log ~ alt_value, data = ., var.equal = T))) %>% 
  
  ## ungroup before adjusting p-value
  ungroup() %>% 
  
  ## adjust p-values to multiple testing using BH method 
  mutate(adj.p.value=p.adjust(p.value, method = "BH"))
  

################################## Order of cytokines by descending significance ############################
Cytokine_order <-
  p_values %>% 
  dplyr::group_by(Cytokine) %>% 
  dplyr::arrange(adj.p.value) %>% 
  dplyr::filter(row_number()==1) %>% 
  ungroup() %>% 
  dplyr::arrange(adj.p.value) %>% 
  dplyr::select(Cytokine) %>% 
  unlist()

Genetic_alt_order <-
  p_values %>% 
  dplyr::group_by(Genetic_alt) %>% 
  dplyr::arrange(adj.p.value) %>% 
  dplyr::filter(row_number()==1) %>% 
  ungroup() %>% 
  dplyr::arrange(adj.p.value) %>% 
  dplyr::select(Genetic_alt) %>% 
  unlist()

############################# Define FDR cutoff #################################
fdr = 0.1
  

############################################ Plot ########################################
p_values %<>% 
  mutate(Cytokine=factor(Cytokine, levels = Cytokine_order)) %>% 
  mutate(Genetic_alt=factor(Genetic_alt, levels = Genetic_alt_order))
  
Fig3A <- 
  ggplot(dplyr::filter(p_values, adj.p.value>fdr), 
         aes(x = Cytokine, y = -log10(adj.p.value))) +
  geom_point(color = "lightgrey", size=3) +
  geom_beeswarm(data = dplyr::filter(p_values, adj.p.value<=fdr), 
                aes( color=Genetic_alt), size=5, cex=1.7) +
  ##FDR line  
  geom_hline(yintercept = -log10(fdr),linetype="dashed", size=0.3)+
  
  ##Main Theme
  t1 +
  
  theme(axis.text.x = element_text(angle=45, 
                                   size=18, 
                                   face="bold",
                                   hjust=1, 
                                   vjust=1),
        axis.title.x=element_blank()) +
  ##Legend Theme
  theme(legend.position="bottom", 
        legend.title = element_text(face='bold', hjust = 0, size=18), 
        legend.key = element_blank(),  
        legend.text = element_text(size=18)) +
  guides(colour = guide_legend(nrow=3, title = "Mutations"), 
         shape = guide_legend(ncol = 1)) +
  scale_color_manual(name = "Mutations", values = colors, labels=c("IGHV.status"="IGHV status", "del9p"="del(9p)", "trisomy12"="trisomy 12", "gain17q"="gain(17q)", "gain2p"="gain(2p)","gain19p"="gain(19p)","gain19q"="gain(19q)", "del7q"="del(7q)","del9q"="del(9q)", "del4p"= "del(4p)", "SPEN"="SPEN", "del11q"="del(11q)", "del1q" = "del(1q)")) +
  scale_y_continuous(expression("BH-adjusted  "* italic(p)*"-value"), 
                     breaks=seq(0,10,5),
                     labels=math_format(expr=10^.x)(-seq(0,10,5)))+
  scale_x_discrete(labels=c("TGF-b1"="TGF-\u03B21", "sCD40L+IL-4"="sCD40L + IL4", "IL-1b"="IL1\u03B2", "IL-4"="IL4", "IL-6"="IL6","IL-15"="IL15","IL-10"="IL10", "IL-21"="IL21","IL-2"="IL2", "Interferon gamma"= "Interferon \u03B3", "SDF-1a"="SDF-1\u03B1"))

Fig3A

Print significant p-values for text reference.

pvals <-
  p_values %>%
  dplyr::rename(cyt=Cytokine, mutation=Genetic_alt)

p_values %>% 
  dplyr::filter(adj.p.value<0.1) %>% 
  dplyr::select(Cytokine, Genetic_alt, adj.p.value) %>% 
  dplyr::arrange(adj.p.value) %>% 
  DT::datatable()

19.2 Figure 3B

Lasso plots to represent genetic predictors of response to stimuli, calculated by lasso-regularised regression
Here we use a Gaussian linear model with L1-penalty. As the dependent variable, the normalised viability value is used for all stimuli. The genetic feature matrix consists of mutations and CNVs (p=39), IGHV status (encoded as M = 1 and U = 0) and Methylation Cluster(encoded as 0, 0.5, 1). As a measure of explained variance the reduction in cross-validated mean squared error relative to the null model is calculated.
Set up genetic feature matrix

#Generate Matrix

#select features from patient meta file
geneMatrix <- 
  dplyr::select(patMeta,
                -c(gender:treatment)) %>%
  
  #adjust IGHV.status levels  U and M to numeric 1 and 0 
  mutate(IGHV = ifelse(is.na(IGHV.status), NA,
                       ifelse(IGHV.status == "M", 1, 0)), 
         #adjust Methylation_Cluster levels  LP, IP, HP to 0, 0.5, 1
         Methylation = ifelse(is.na(Methylation_Cluster), NA,
                              ifelse(Methylation_Cluster == "LP", 0,
                                     ifelse(Methylation_Cluster == "IP", 0.5, 1))),
         #remove old columns
         IGHV.status=NULL, Methylation_Cluster=NULL ) %>%
  
  #rename columns for labelling

  #rename("del(11q)" = del11q) %>%
  #rename("del(13q)" = del13q) %>%
  #rename("del(17p)" = del17p) %>%
  #rename("trisomy 12" = trisomy12) %>%
  
  #convert factors to numeric
  mutate_if(is.factor, as.character) %>%
  mutate_at(vars(-PatientID), as.numeric) %>%
  
  #convert to matrix format, with patient IDs as rownames
  data.frame() %>% 
  column_to_rownames("PatientID") %>% 
  as.matrix()


#Tidy matrix for use in glmnet function

#Remove genes with higher than 20% missing values
geneMatrix <- geneMatrix[,colSums(is.na(geneMatrix))/nrow(geneMatrix) <= 0.2]

#Filter for patients with complete data
geneMatrix.complete <- geneMatrix[complete.cases(geneMatrix),]


#Combine KRAS, NRAS and BRAF mutations into a single column
#set up empty matrix
Ras_Raf <- matrix(NA, 
                  nrow = nrow(geneMatrix.complete), 
                  ncol = 1)

colnames(Ras_Raf) <- "RAS/RAF"

#add RAS/RAF column to matrix
geneMatrix.complete <- cbind(geneMatrix.complete, Ras_Raf)

#Annotate RAS_RAF where where any of KRAS, NRAS or BRAF are mutated
geneMatrix.complete[,"RAS/RAF"] <- ifelse(geneMatrix.complete[,"KRAS"]==1,1,
                                                ifelse(geneMatrix.complete[,"BRAF"]==1,1,
                                                  ifelse(geneMatrix.complete[,"NRAS"]==1, 1, 0)))


#remove KRAS, NRAS and BRAF columns
geneMatrix.complete <- 
  geneMatrix.complete[, colnames(geneMatrix.complete) != "KRAS"]

geneMatrix.complete <- 
  geneMatrix.complete[, colnames(geneMatrix.complete) != "BRAF"]

geneMatrix.complete <- 
  geneMatrix.complete[, colnames(geneMatrix.complete) != "NRAS"]

Set up viability response matrix

viabMatrix <- 
  #select patients with complete meta data 
  dplyr::filter(df,
                PatientID %in% row.names(geneMatrix.complete)) %>%
  #select cytokine, patient and log(viability values only)
  dplyr::select(Cytokine, 
                Log, 
                PatientID) %>% 
  #reshape data
  spread(key = PatientID, value = Log) %>% 
  data.frame() %>% 
  #make Cytokine the row names
  remove_rownames() %>%
  column_to_rownames("Cytokine")

#make sample order same as in geneMatrix
viabMatrix <- viabMatrix[,rownames(geneMatrix.complete)]

Run lasso regression

#set object to hold model outputs
dataResult <- list()

#fit model for each stimulus
for (i in rownames(viabMatrix)){  
  
    #prepare input and response matrices
    y <- unlist(viabMatrix[i,]) # viability for each patient with given condition
    X <- geneMatrix.complete #genetic features for each patient

    #fit the model
    cvglmfit <- runGlm(X, y, method="lasso", repeats=30, folds=3)
    
    #collect the results for each stimulus in one object
    dataResult[[i]] <- cvglmfit

}

Get predictor profiles for stimuli of interest

heatMaps_cyt <- lassoPlot(dataResult[c("IL-4","CpG ODN", "TGF-b1")] , 
                          geneMatrix.complete, #use gene matrix for heatmap 
                          viabMatrix, #use viab matrix for scatter plot
                          freqCut = 0.75, #coefficients should be selected in <75% of bootstrapped model files
                          coefCut = 0.00) #no minimum value for coefficients 

Assemble legend for heatmaps

#G = gene mutations, I = IGHV, M = Methylation Cluster
legendFor = c("G", "I", "M")

#assign colours for I, G and M
coldef<-list()
coldef["I"] <- list(c(offwhite,"#707372")) #U-CLL and M-CLL
coldef["M"] <- list(c(offwhite, "#707372", "#A8A99E")) #IP, HP, LP
coldef["G"] <- list(c(offwhite, "#373A36")) #WT and Mutated


legends = makelegends(legendFor=c("G","I","M"),coldef)

Assemble figure

Fig3B_1 <- wrap_elements(grid.arrange( grobs = heatMaps_cyt[1], ncol =1))

Fig3B_2 <- wrap_elements(grid.arrange( grobs = heatMaps_cyt[2], ncol =1))

Fig3B_3 <- wrap_elements(grid.arrange( grobs = heatMaps_cyt[3], ncol =1))

Fig3B_leg <- legends[["plot"]] %>% wrap_elements()

20 Assemble Figure

design1<-"
  AB
"

blankplot <- 
  ggplot() +
  geom_blank() +
  theme(panel.background = element_blank())

tp <- theme(plot.tag=element_text(size = 30))

Fig3 <-
  wrap_elements(Fig3A) + tp +
  
   wrap_elements(Fig3B_1 +
                Fig3B_2+
                Fig3B_3+
                wrap_elements(blankplot + # add a blank space before legend to align with plots above
                                Fig3B_leg + 
                                plot_layout(ncol=2, widths  = c(0.15,0.9))) +
                  plot_layout(ncol=1, 
                              heights = c(1.7,1.7,1.7,0.5)))+tp +
                
    
  plot_annotation(tag_levels = "A")+
  plot_layout(design = design1, width=c(2,1.7))

Fig3

21 Appendix

Sys.info()
##                                                                                             sysname 
##                                                                                            "Darwin" 
##                                                                                             release 
##                                                                                            "19.6.0" 
##                                                                                             version 
## "Darwin Kernel Version 19.6.0: Thu May  6 00:48:39 PDT 2021; root:xnu-6153.141.33~1/RELEASE_X86_64" 
##                                                                                            nodename 
##                                               "mac-huber20.academic.christs.cam.ac.uk.pool.embl.de" 
##                                                                                             machine 
##                                                                                            "x86_64" 
##                                                                                               login 
##                                                                                             "holly" 
##                                                                                                user 
##                                                                                             "holly" 
##                                                                                      effective_user 
##                                                                                             "holly"
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.1                          
##  [2] stringr_1.4.0                          
##  [3] dplyr_1.0.7                            
##  [4] purrr_0.3.4                            
##  [5] readr_1.4.0                            
##  [6] tibble_3.1.2                           
##  [7] tidyverse_1.3.1                        
##  [8] tidyr_1.1.3                            
##  [9] rlang_0.4.11                           
## [10] maxstat_0.7-25                         
## [11] data.table_1.14.0                      
## [12] png_0.1-7                              
## [13] formattable_0.2.1                      
## [14] broom_0.7.8                            
## [15] scales_1.1.1                           
## [16] gtable_0.3.0                           
## [17] genomation_1.24.0                      
## [18] ChIPseeker_1.28.3                      
## [19] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [20] GenomicFeatures_1.44.0                 
## [21] msigdbr_7.4.1                          
## [22] org.Hs.eg.db_3.13.0                    
## [23] AnnotationDbi_1.54.1                   
## [24] clusterProfiler_4.0.0                  
## [25] ConsensusClusterPlus_1.56.0            
## [26] glmnet_4.1-2                           
## [27] Matrix_1.3-4                           
## [28] survminer_0.4.9                        
## [29] ggpubr_0.4.0                           
## [30] DESeq2_1.32.0                          
## [31] SummarizedExperiment_1.22.0            
## [32] Biobase_2.52.0                         
## [33] MatrixGenerics_1.4.0                   
## [34] matrixStats_0.59.0                     
## [35] GenomicRanges_1.44.0                   
## [36] GenomeInfoDb_1.28.1                    
## [37] IRanges_2.26.0                         
## [38] S4Vectors_0.30.0                       
## [39] BiocGenerics_0.38.0                    
## [40] ggfortify_0.4.11                       
## [41] pheatmap_1.0.12                        
## [42] magrittr_2.0.1                         
## [43] ggbeeswarm_0.6.0                       
## [44] ggrepel_0.9.1                          
## [45] RColorBrewer_1.1-2                     
## [46] cowplot_1.1.1                          
## [47] patchwork_1.1.1                        
## [48] magick_2.7.2                           
## [49] gridExtra_2.3                          
## [50] Hmisc_4.5-0                            
## [51] ggplot2_3.3.5                          
## [52] Formula_1.2-4                          
## [53] survival_3.2-11                        
## [54] lattice_0.20-44                        
## [55] plyr_1.8.6                             
## [56] BiocStyle_2.20.2                       
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.3           rtracklayer_1.52.0       exactRankTests_0.8-32   
##   [4] bit64_4.0.5              knitr_1.33               DelayedArray_0.18.0     
##   [7] rpart_4.1-15             KEGGREST_1.32.0          RCurl_1.98-1.3          
##  [10] generics_0.1.0           RSQLite_2.2.7            shadowtext_0.0.8        
##  [13] bit_4.0.4                enrichplot_1.12.2        lubridate_1.7.10        
##  [16] xml2_1.3.2               assertthat_0.2.1         viridis_0.6.1           
##  [19] xfun_0.24                hms_1.1.0                babelgene_21.4          
##  [22] evaluate_0.14            fansi_0.5.0              restfulr_0.0.13         
##  [25] progress_1.2.2           caTools_1.18.2           dbplyr_2.1.1            
##  [28] readxl_1.3.1             km.ci_0.5-2              igraph_1.2.6            
##  [31] DBI_1.1.1                geneplotter_1.70.0       htmlwidgets_1.5.3       
##  [34] ellipsis_0.3.2           crosstalk_1.1.1          backports_1.2.1         
##  [37] bookdown_0.22.3          markdown_1.1             annotate_1.70.0         
##  [40] gridBase_0.4-7           biomaRt_2.48.2           vctrs_0.3.8             
##  [43] abind_1.4-5              cachem_1.0.5             withr_2.4.2             
##  [46] ggforce_0.3.3            BSgenome_1.60.0          checkmate_2.0.0         
##  [49] GenomicAlignments_1.28.0 treeio_1.16.1            prettyunits_1.1.1       
##  [52] cluster_2.1.2            DOSE_3.18.1              mltools_0.3.5           
##  [55] ape_5.5                  lazyeval_0.2.2           crayon_1.4.1            
##  [58] genefilter_1.74.0        labeling_0.4.2           pkgconfig_2.0.3         
##  [61] tweenr_1.0.2             nlme_3.1-152             vipor_0.4.5             
##  [64] nnet_7.3-16              lifecycle_1.0.0          downloader_0.4          
##  [67] filelock_1.0.2           BiocFileCache_2.0.0      modelr_0.1.8            
##  [70] seqPattern_1.24.0        cellranger_1.1.0         polyclip_1.10-0         
##  [73] aplot_0.0.6              KMsurv_0.1-5             carData_3.0-4           
##  [76] boot_1.3-28              zoo_1.8-9                reprex_2.0.0            
##  [79] base64enc_0.1-3          beeswarm_0.4.0           viridisLite_0.4.0       
##  [82] rjson_0.2.20             bitops_1.0-7             KernSmooth_2.23-20      
##  [85] Biostrings_2.60.1        blob_1.2.1               shape_1.4.6             
##  [88] pdftools_3.0.1           qvalue_2.24.0            qpdf_1.1                
##  [91] jpeg_0.1-8.1             rstatix_0.7.0            ggsignif_0.6.2          
##  [94] memoise_2.0.0            gplots_3.1.1             zlibbioc_1.38.0         
##  [97] compiler_4.1.0           scatterpie_0.1.6         BiocIO_1.2.0            
## [100] plotrix_3.8-1            cli_3.0.0                Rsamtools_2.8.0         
## [103] XVector_0.32.0           htmlTable_2.2.1          MASS_7.3-54             
## [106] tidyselect_1.1.1         stringi_1.6.2            highr_0.9               
## [109] yaml_2.2.1               GOSemSim_2.18.0          askpass_1.1             
## [112] locfit_1.5-9.4           latticeExtra_0.6-29      survMisc_0.5.5          
## [115] fastmatch_1.1-0          tools_4.1.0              rio_0.5.27              
## [118] rstudioapi_0.13          foreach_1.5.1            foreign_0.8-81          
## [121] farver_2.1.0             ggraph_2.0.5             digest_0.6.27           
## [124] rvcheck_0.1.8            BiocManager_1.30.16      ggtext_0.1.1            
## [127] gridtext_0.1.4           Rcpp_1.0.6               car_3.0-11              
## [130] httr_1.4.2               colorspace_2.0-2         rvest_1.0.0             
## [133] fs_1.5.0                 XML_3.99-0.6             splines_4.1.0           
## [136] tidytree_0.3.4           graphlayouts_0.7.1       xtable_1.8-4            
## [139] jsonlite_1.7.2           ggtree_3.0.2             tidygraph_1.2.0         
## [142] R6_2.5.0                 pillar_1.6.1             htmltools_0.5.1.1       
## [145] DT_0.18                  glue_1.4.2               fastmap_1.1.0           
## [148] BiocParallel_1.26.1      codetools_0.2-18         fgsea_1.18.0            
## [151] mvtnorm_1.1-2            utf8_1.2.1               curl_4.3.2              
## [154] gtools_3.9.2             zip_2.2.0                GO.db_3.13.0            
## [157] openxlsx_4.2.4           rmarkdown_2.9            munsell_0.5.0           
## [160] DO.db_2.9                GenomeInfoDbData_1.2.6   iterators_1.0.13        
## [163] impute_1.66.0            haven_2.4.1              reshape2_1.4.4

In this sub-vignette we present the analysis and source code for Figure 4. This sub-vignette can be built along with all others, to generate all paper figures, by running CLLCytokineScreen2021.Rmd.

22 Set up

set.seed(1996)

Load libraries

library(formattable)
library(plyr)
library(clusterProfiler)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(ChIPseeker)
library(genomation)
library(ggbeeswarm)
library(ggpubr)
library(ggplot2)
library(patchwork)
library(msigdbr)
library(png)
library(cowplot)
library(tidyverse)
library(Hmisc)

Set plot directory

plotDir = ifelse(exists(".standalone"), "", "../../inst/figs/")
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)

23 Load data

Load raw files

#df: tibblecontaining all screening viability data
load( "../../data/df.RData")

#patMeta: tibble containing all patient genetic data
load( "../../data/patMeta.RData")


#diffTF_large: tibble containing diffTF weighted mean difference values for 636 TFs, for trisomy 12 versus non-trisomy 12 CLL ATACseq data
load( "../../data/diffTF_large.RData")


#ChIPseq data for SPIB and PU1 binding in OCILY3 cell line (downloaded from GEO)

#get info on dataset of interest
chipSeq <- getGEOInfo(genome="hg19", simplify=FALSE) %>% dplyr::filter(series_id=="GSE56857")

#Define gsm id for SPIB and PU1 ChIPseq dataset in OCILY3 cell line (DLBCL)
gsm_spib = "GSM1370276"
gsm_pu1 = "GSM1370275"                                                         

#download bed file if not already there                                          
downloadGSMbedFiles(gsm_spib, destDir="../../inst/extdata/")
downloadGSMbedFiles(gsm_pu1, destDir="../../inst/extdata/")

#read the data
#ChIPseq data downloaded from GEO Accession Number: GSE56857
peak.spib <-  genomation ::readBed("../../inst/extdata/GSM1370276_OCILY3_SPIB_GEM_events.bed.gz", track.line = "auto")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   X1 = col_character(),
##   X2 = col_double(),
##   X3 = col_double(),
##   X4 = col_character(),
##   X5 = col_double()
## )
peak.pu1 <-  genomation ::readBed( "../../inst/extdata/GSM1370275_OCILY3_PU1_GEM_events.bed.gz", track.line = "auto")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   X1 = col_character(),
##   X2 = col_double(),
##   X3 = col_double(),
##   X4 = col_character(),
##   X5 = col_double()
## )
#SPIB_KD_Cellcounts: a tibble containing Normalized Cell Counts for Cell Lines infected with shRNA against SPIB and PU1
load( "../../data/SPIB_KD_Cellcounts.RData")
#df: tibble containing all screening viability data
df <- read.table(file= "../../inst/extdata/df.txt",header = TRUE) %>% as_tibble()

#patMeta: tibble containing all patient genetic data
patMeta <- read.table(file= "../../inst/extdata/patMeta.txt",header = TRUE) %>% as_tibble()


#diffTF_large: tibble containing diffTF weighted mean difference values for 636 TFs, for trisomy 12 versus non-trisomy 12 CLL ATACseq data
diffTF_large <- read_tsv("../../inst/extdata/BIGCLL52.permutations.summary.tsv.gz")


#ChIPseq data for SPIB and PU1 binding in OCILY3 cell line (downloaded from GEO)
#get info on dataset of interest
chipSeq <- getGEOInfo(genome="hg19", simplify=FALSE) %>% dplyr::filter(series_id=="GSE56857")

#Define gsm id for SPIB and PU1 ChIPseq dataset in OCILY3 cell line (DLBCL)
gsm_spib = "GSM1370276"
gsm_pu1 = "GSM1370275"                                                         

#download bed file if not already there                                          
downloadGSMbedFiles(gsm_spib, destDir="../../inst/extdata/")
downloadGSMbedFiles(gsm_pu1, destDir="../../inst/extdata/")

#read the data
peak.spib <-  genomation ::readBed("../../inst/extdata/GSM1370276_OCILY3_SPIB_GEM_events.bed.gz", track.line = "auto")
peak.pu1 <-  genomation ::readBed( "../../inst/extdata/GSM1370275_OCILY3_PU1_GEM_events.bed.gz", track.line = "auto")


#SPIB_KD_Cellcounts: a tibble containing Normalized Cell Counts for Cell Lines infected with shRNA against SPIB and PU1
SPIB_KD_Cellcounts <- read.table(file= "../../inst/extdata/SPIB_KD_Cellcounts.txt",header = TRUE) %>% as_tibble()



#Arrangedata
#df
df$Cytokine <- factor(df$Cytokine, levels= c("No Cytokine",
                                              "Resiquimod", 
                                              "IL-4", 
                                              "TGF-b1",
                                              "IL-1b",
                                              "Interferon gamma",
                                              "SDF-1a",
                                              "sCD40L" ,
                                              "sCD40L+IL-4",
                                              "soluble anti-IgM", 
                                              "CpG ODN",
                                              "IL-6",
                                              "IL-10",
                                              "IL-21",
                                              "HS-5 CM", 
                                              "IL-15",
                                              "BAFF",
                                              "IL-2" ))

#patMeta

patMeta <- patMeta %>%mutate_at(vars(-PatientID), as.factor)

Process files

#get Cytokine only data
df <- dplyr::filter(df, 
                    Drug == "DMSO", 
                    Cytokine != "No Cytokine")

24 Define Aesthetics

source("../../R/themes_colors.R")

25 Plot Figures

25.1 Figure 4A

Boxplot showing response to TLR agonist Resiquimod, stratified by IGHV status and Trisomy 12

#add genetic data to viability dataframe
df_patmeta <- left_join(df, patMeta, by = "PatientID")

#filter for Resiquimod-only treatment, only show patients who are annoyated for IGHV and Trisomy12
plotTab <- df_patmeta %>% 
  dplyr::filter(Cytokine=="Resiquimod",
                !is.na(trisomy12),
                !is.na(IGHV.status))

Fig4A =  
 
  plotTab %>%
         
  ggplot(aes(x=interaction(IGHV.status, trisomy12),
                    y=Log,
                    color=(trisomy12)))+
  geom_hline(yintercept = 0)+
  geom_boxplot()+
  geom_beeswarm(cex=1.5) +
  guides(color="none", shape="none")+
  scale_x_discrete(labels=c("M.0"="IGHV-M\n WT",
                            "U.0"="IGHV-U\n WT",
                            "M.1"="IGHV-M\n trisomy 12",
                            "U.1"="IGHV-U\n trisomy 12"))+
  stat_compare_means(method = "t.test",
                     label.x.npc = "center", 
                     comparisons = list( c(1,2), c(3,4), c(1,3)),
                     size=7)+
  xlab("") +
  ylab("Log(Viability)") +
  ggtitle("Resiquimod") +
  scale_color_manual(values=c(palblues[3], palreds[8])) + 
  coord_cartesian(ylim = c(-1.5, 3.1), clip="off") +
  t1+
  theme(axis.text.x = element_text(angle = 45, vjust =1))


rm(plotTab)

Fig4A

25.2 Figure 4B

TF activity in trisomy 12 and non-trisomy12 samples

#Get all TFs that show differential activity
plotTab.diffTF <- dplyr::filter(diffTF_large, pvalueAdj < 0.05)

#change to name of TF quoted in text
plotTab.diffTF$TF <- gsub("SPI1", "PU1", plotTab.diffTF$TF)

#order by weighted mean difference
idx <- order(plotTab.diffTF[["weighted_meanDifference"]], decreasing = TRUE)
plotTab.diffTF$TF <- factor(plotTab.diffTF$TF,levels=rev(unique(plotTab.diffTF$TF[idx])))


#plot figure
Fig4B <-
  ggplot(plotTab.diffTF,
         aes(x=weighted_meanDifference,
             y=TF,
             fill= ifelse(weighted_meanDifference>0, 
                          palreds[5], palblues[1]))) +
  geom_bar(stat = "identity", width = 0.75) +
  xlim(c(-0.2, 0.175)) +
  
  #colours and theme
  scale_fill_manual(values = c(palblues[1], palreds[8]), guide = "none") +
  t1 + 
  theme(axis.text.x = element_text(angle = 45, vjust =1))+
  
  #Labels
  ylab("") + 
  xlab("Change in TF activity") +
  ggtitle("Diffential TF activity in trisomy 12 CLL") + 
  
  #Add annotations
  #arrow 1
  annotate("segment", x = -0.15, xend = -0.15, y = 8.4, yend = 0.75, 
           colour = "#5e5e5e", size=3, alpha=1, arrow=arrow()) + 
  #arrow 2
  annotate("segment", x = -0.15, xend = -0.15, y = 8.6, yend = 17, 
           colour = "#5e5e5e", size=3, alpha=1, arrow=arrow()) + 
 
   #2 arrow labels
  annotate("text", x = c(-0.185,-0.185), y = c(4.8,12.5), 
          label = c("Lower activity in trisomy 12 CLL", 
                    "Higher activity in trisomy 12 CLL") , 
          color="black", 
          size=5 , fontface="bold") +

  #flip plot
  coord_flip() 
  

  

Fig4B

25.3 Figure 4C

SPIB and PU1 ChIPseq analysis and enrichment of cytokine signalling genes near to SPIB and PU1 binding site
Here, for Spi-B and PU1 ChIP peaks in an OCILY3 DLBCL cell line, we use the clusterProfiler package to annotate the nearest gene for each significant ChIPpeak (q<0.05). We filter the resulting gene list for those with a peak within ±1kb of the TSS and tested for over-representation of selected KEGG and Reactome pathways.

Get significant peaks

#filter ChipSeq data for peaks with significant score -log10(Qscore)>1.3
#if q < 0.05, then Qlog >1.3
spib.filtered <- peak.spib[(elementMetadata(peak.spib)[,1] > 1.3)]
pu1.filtered <- peak.pu1[(elementMetadata(peak.pu1)[,1] > 1.3)]

Annotate ChIP peaks

# Annotate peaks with meta data
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene

peakAnno.spib <- annotatePeak(spib.filtered, 
                              tssRegion = c(-3000, 3000),
                              TxDb = txdb, 
                              annoDb = "org.Hs.eg.db")
## >> preparing features information...      2021-07-20 15:44:43 
## >> identifying nearest features...        2021-07-20 15:44:45 
## >> calculating distance from peak to TSS...   2021-07-20 15:44:46 
## >> assigning genomic annotation...        2021-07-20 15:44:46 
## >> adding gene annotation...          2021-07-20 15:45:03
## 'select()' returned 1:many mapping between keys and columns
## >> assigning chromosome lengths           2021-07-20 15:45:03 
## >> done...                    2021-07-20 15:45:03
peakAnno.pu1 <- annotatePeak(pu1.filtered, 
                             tssRegion = c(-3000, 3000),
                             TxDb = txdb, 
                             annoDb = "org.Hs.eg.db")
## >> preparing features information...      2021-07-20 15:45:03 
## >> identifying nearest features...        2021-07-20 15:45:03 
## >> calculating distance from peak to TSS...   2021-07-20 15:45:04 
## >> assigning genomic annotation...        2021-07-20 15:45:04 
## >> adding gene annotation...          2021-07-20 15:45:05
## 'select()' returned 1:many mapping between keys and columns
## >> assigning chromosome lengths           2021-07-20 15:45:05 
## >> done...                    2021-07-20 15:45:05
#Filter for genes within ±1kb
peakAnno.spib.1kb <- as.data.frame(peakAnno.spib) %>% dplyr::filter(abs(distanceToTSS) <1000)
peakAnno.pu1.1kb <- as.data.frame(peakAnno.pu1) %>% dplyr::filter(abs(distanceToTSS) <1000)

Download KEGG and Reactome pathways of interest for running overrepresentation tests

#get  KEGG pathways from msigdbr database
kegg <- 
  msigdbr(species = "Homo sapiens", category = "C2", subcategory = "CP:KEGG") %>% 
  #select pathway name and entrez id 
  dplyr::select(gs_name, entrez_gene)


#subset for pathways that we want to test
kegg <-
  dplyr::filter(kegg, 
         #All KEGG immune signaling pathways
         gs_name %in% c("KEGG_B_CELL_RECEPTOR_SIGNALING_PATHWAY",
                        "KEGG_CHEMOKINE_SIGNALING_PATHWAY",
                        "KEGG_CYTOKINE_CYTOKINE_RECEPTOR_INTERACTION", 
                        "KEGG_JAK_STAT_SIGNALING_PATHWAY" ,
                        "KEGG_NOD_LIKE_RECEPTOR_SIGNALING_PATHWAY"  , 
                        "KEGG_NOTCH_SIGNALING_PATHWAY", 
                        "KEGG_RIG_I_LIKE_RECEPTOR_SIGNALING_PATHWAY", 
                        "KEGG_TOLL_LIKE_RECEPTOR_SIGNALING_PATHWAY",
                        "KEGG_T_CELL_RECEPTOR_SIGNALING_PATHWAY",
                        #Control genesets
                        "KEGG_OXIDATIVE_PHOSPHORYLATION",                  
                        "KEGG_DNA_REPLICATION"
         ))

colnames(kegg) <- c("term", "gene")


#tidy up terms - remove KEGG, underscore and put in lower cases
kegg$term <- gsub("KEGG_", "",kegg$term )
kegg$term <- gsub("_", " ",kegg$term )
kegg$term <- gsub("B CELL RECEPTOR SIGNALING PATHWAY", 
                  "BCR Signaling Pathway",kegg$term )
kegg$term <- gsub("CHEMOKINE SIGNALING PATHWAY", 
                  "Chemokine Signaling Pathway",kegg$term )
kegg$term <- gsub("CYTOKINE CYTOKINE RECEPTOR INTERACTION", 
                  "Cytokine Cytokine Receptor Interaction",kegg$term )
kegg$term <- gsub("JAK STAT SIGNALING PATHWAY", 
                  "JAK STAT signaling pathway",kegg$term )
kegg$term <- gsub("NOD LIKE RECEPTOR SIGNALING PATHWAY", 
                  "NLR Signaling Pathway",kegg$term )
kegg$term <- gsub("NOTCH SIGNALING PATHWAY", 
                  "Notch Signaling Pathway",kegg$term )
kegg$term <- gsub("RIG I LIKE RECEPTOR SIGNALING PATHWAY", 
                  "RLR Signaling Pathway",kegg$term )
kegg$term <- gsub("TOLL LIKE RECEPTOR SIGNALING PATHWAY", 
                  "TLR Signaling Pathway",kegg$term )

kegg$term <- gsub("T CELL RECEPTOR SIGNALING PATHWAY", 
                  "TCR Signaling Pathway",kegg$term )
kegg$term <- gsub("OXIDATIVE PHOSPHORYLATION", 
                  "Oxidative Phosphorylation",kegg$term )
kegg$term <- gsub("P53 SIGNALING PATHWAY", 
                  "P53 Signaling Pathway",kegg$term )
kegg$term <- gsub("DNA REPLICATION", 
                  "DNA Replication",kegg$term )

kegg$genesetDatabase <- "KEGG"


#get reactome pathways
reactome <- 
  msigdbr(species = "Homo sapiens", category = "C2", subcategory = "CP:REACTOME") %>% 
  #select pathway name and entrez id
  dplyr::select(gs_name, entrez_gene)

#subset for pathways that we want to test
reactome <-
  dplyr::filter(reactome, 
         gs_name %in% c(#selected immune pathways
                        "REACTOME_SIGNALING_BY_TGF_BETA_RECEPTOR_COMPLEX",
                        "REACTOME_TNF_SIGNALING",
                        "REACTOME_INTERLEUKIN_1_SIGNALING",
                        "REACTOME_INTERLEUKIN_10_SIGNALING",
                        "REACTOME_INTERLEUKIN_15_SIGNALING",
                        "REACTOME_INTERLEUKIN_2_SIGNALING",
                        "REACTOME_INTERLEUKIN_4_AND_INTERLEUKIN_13_SIGNALING",
                        "REACTOME_INTERLEUKIN_6_SIGNALING" ,
                        "REACTOME_OTHER_INTERLEUKIN_SIGNALING",
                        "REACTOME_SIGNALING_BY_INTERLEUKINS",
                        "REACTOME_INTERFERON_ALPHA_BETA_SIGNALING" ,
                        "REACTOME_INTERFERON_GAMMA_SIGNALING", 
                        #Control genesets
                        "REACTOME_CELL_CYCLE",                  
                        "REACTOME_REGULATION_OF_TP53_ACTIVITY" 
                        ))

colnames(reactome) <- c("term", "gene")

#tidy up terms
reactome$term <- gsub("REACTOME_", "",reactome$term )
reactome$term <- gsub("_", " ",reactome$term )


#tidy up and put in lower case
reactome$term <- gsub("CELL CYCLE", 
                      "Cell Cycle",reactome$term )
reactome$term <- gsub("INTERLEUKIN 1 SIGNALING", 
                      "Interleukin 1 Signaling",reactome$term )
reactome$term <- gsub("INTERLEUKIN 10 SIGNALING", 
                      "Interleukin 10 Signaling",reactome$term )
reactome$term <- gsub("INTERLEUKIN 15 SIGNALING", 
                      "Interleukin 15 Signaling",reactome$term )
reactome$term <- gsub("INTERLEUKIN 2 SIGNALING", 
                      "Interleukin 2 Signaling",reactome$term )
reactome$term <- gsub("INTERLEUKIN 4 AND INTERLEUKIN 13 SIGNALING", 
                      "Interleukin 4 and Interleukin 13 Signaling",reactome$term )
reactome$term <- gsub("INTERLEUKIN 6 SIGNALING",
                      "Interleukin 6 Signaling",reactome$term )
reactome$term <- gsub("OTHER INTERLEUKIN SIGNALING", 
                      "Other Interleukin Signaling",reactome$term )
reactome$term <- gsub("SIGNALING BY INTERLEUKINS", 
                      "Signaling by Interleukins",reactome$term )
reactome$term <- gsub("REGULATION OF TP53 ACTIVITY", 
                      "Regulation of TP53 Pathway",reactome$term )
reactome$term <- gsub("SIGNALING BY TGF BETA RECEPTOR COMPLEX",
                      "Signaling by TGFbeta \nReceptor Complex", reactome$term )
reactome$term <- gsub("TNF SIGNALING", 
                      "TNF Signaling",reactome$term )
reactome$term <- gsub("INTERFERON ALPHA BETA SIGNALING", 
                      "Interferon Alpha Beta Signaling",reactome$term )

reactome$term <- gsub("INTERFERON GAMMA SIGNALING", 
                      "Interferon Gamma Signaling", reactome$term )

#annotate which database these pathways come from 
reactome$genesetDatabase <- "Reactome"

Run over-representation test forSPIB

#generate TERM2GENE dataframe to use in overrepresetation test
#join kegg and reactome gene sets together - this defines the list of pathways we are interested in, and select term and gene columns
term2gene <- rbind(kegg, reactome) %>% dplyr::select(term, gene)

#term2genesetdatabase - keep details of database source for each pathway 
term2genesetdatabase <- rbind(kegg, reactome) %>% dplyr::select(term, genesetDatabase)
colnames(term2genesetdatabase) <- c("term", "name")

##SPIB
#run over-representation test, and extract all results and p values:
#NB Bg ratio = number of genes in set / number of unique genes in term2gene, 
#NB Gene ratio = number of SPIB targets in gene set being tested / total number of SPIB targets that overlap with genes in term to gene

spibRes <- enricher(unique(as.data.frame(peakAnno.spib.1kb)$geneId), 
                    #Use pathways in term2gene
                    TERM2GENE=term2gene, 
                    #assign description for each term, as its database source
                    TERM2NAME = term2genesetdatabase, 
                    #no cutoffs on reported results, we want to see all results and p values 
                    minGSSize = 0,
                    maxGSSize = 1000,
                    pvalueCutoff = 1, 
                    qvalueCutoff = 1)


#get dataframe of results
spibRes.df <- fortify(spibRes, 
              showCategory = length(unique(term2gene$term)), #show all pathways tested
              split=NULL)

resTab.spib <-  
  mutate(spibRes.df, genesetSize = BgRatio*length(unique(term2gene$gene))) %>%  #multiply BgRatio by total genes tested to get number of genes in set
  dplyr::select(ID, Description, pvalue, Count, genesetSize)

colnames(resTab.spib) <- c("ID", "genesetDatabase", "pvalue_SPIB","count_SPIB", "genesetSize")

Run over-representation test for PU1

##PU1
#run overepresentation test, and extract all results and pvalues (ie use p value and q value cutoff at 1) 
#NB not all results are shown because for some genesets there is no overlap between pu1 targets and geneset
pu1Res <- enricher(unique(as.data.frame(peakAnno.pu1.1kb)$geneId), 
                    TERM2GENE=term2gene, 
                    TERM2NAME = term2genesetdatabase, 
                    minGSSize = 0,
                    maxGSSize = 10000,
                    pvalueCutoff = 1, 
                    qvalueCutoff = 1)


#get dataframe of results
pu1Res.df <- fortify(pu1Res, 
              showCategory = length(unique(term2gene$term)), #show all pathways tested 
              split=NULL)


resTab.pu1 <-  mutate(pu1Res.df, genesetSize = BgRatio*length(unique(term2gene$gene))) %>%  #multiply BgRatio by total genes tested to get number of genes in set
  dplyr::select(ID, Description, pvalue, Count, genesetSize)

colnames(resTab.pu1) <- c("ID", "genesetDatabase", "pvalue_PU1","count_PU1", "genesetSize")

Join results and make figure

#join results for SPIB and PU1 together to show in one table
resTab <- full_join(resTab.spib, resTab.pu1)
## Joining, by = c("ID", "genesetDatabase", "genesetSize")
#where count and p value is NA, put a 0 / 1 (ie there were no PU1/SPIB target genes in the set)
resTab$count_PU1[is.na(resTab$count_PU1)] <- 0
resTab$count_SPIB[is.na(resTab$count_SPIB)] <- 0

resTab$pvalue_SPIB[is.na(resTab$pvalue_SPIB)] <- 1
resTab$pvalue_PU1[is.na(resTab$pvalue_PU1)] <- 1

#select columns of interest to show in figure
resTab <- dplyr::select(resTab, ID, genesetDatabase, genesetSize,  count_SPIB, pvalue_SPIB,count_PU1, pvalue_PU1)

#adjust decimal places of p values
resTab$pvalue_SPIB <- round(resTab$pvalue_SPIB, digits = 3)
resTab$pvalue_PU1 <- round(resTab$pvalue_PU1, digits = 3)

#get significant results
resTab <- dplyr::filter(resTab, pvalue_SPIB<0.01|pvalue_PU1<0.01)

colnames(resTab) <- c("Pathway",
                      "Geneset Database" , 
                      "Geneset Size",  
                      #show the total number of SPIB targets in ChIPseq data 
                      paste("Spi-B targets (/" ,length(unique(as.data.frame(peakAnno.spib.1kb)$geneId)),")", sep= ""), 
                      "SPIB p-value", 
                      #show the total number of PU1 targets in ChIPseq data  
                      paste("PU.1 targets (/" ,length(unique(as.data.frame(peakAnno.pu1.1kb)$geneId)),")",sep= ""), 
                      "PU1 p-value")

Fig4C<- 
ggtexttable(resTab, rows = NULL, 
           theme = ttheme(
             colnames.style = colnames_style(color = "white", fill = palreds[7], size = 16),
             tbody.style = tbody_style(color = "black",size = 18)
           )
)


Fig4C

25.4 Figure 4D

Fig4D <-
  SPIB_KD_Cellcounts %>% 
  # filter(Experiment=="Single_KD") %>% 
  dplyr::filter(treatment%in%c("scr_shRNA", "PU.1-KD","SpiB-KD", "Double-KD")) %>% 
  mutate(treatment=factor(treatment, levels = c("scr_shRNA", "PU.1-KD", "SpiB-KD", "Double-KD"))) %>% 
  
  
  
  ggplot(aes(x=Day, y=NormalizedCellCount, color=treatment, group=interaction( treatment)))+

  stat_summary(fun = mean,geom='line',aes(color=treatment), size=1) +

  stat_summary(fun=mean,geom='point', size=2) +
  stat_summary(fun.data=mean_cl_boot,geom='errorbar',width=0.2, size=0.5) +
  facet_wrap(~Cellline, scales="fixed")+
  scale_y_log10()+
  labs(y="Relative Cellcount")+
 t2+
  theme(legend.position = "bottom", legend.key = element_blank(), legend.title = element_text(face='bold', size=fontsize+4),  legend.text = element_text(size=fontsize+2), strip.background = element_blank(), strip.text = element_text(size=18, face="bold"))+
      scale_color_manual(name = "Condition", values = c("darkgrey", colors[c(1,3,4)]), labels=c("scr_shRNA"="Control","PU.1-KD"="PU.1-Knockdown","SpiB-KD"="Spi-B-Knockdown","Double-KD"="Spi-B + PU.1 Double Knockdown" ))

Fig4D

26 Assemble Figure 4

design1<-"
  AB
  CC
  DD
"

tp <- theme(plot.tag=element_text(size = 30, face="plain"))

Fig4 <-

  Fig4A + tp +  
  Fig4B + tp + 
  Fig4C + tp +
  Fig4D + tp +
    
  plot_annotation(tag_levels = "A")+
  plot_layout(design = design1, heights = c(1,0.6,0.8), width=c(0.6,1))

Fig4

27 Appendix

Sys.info()
##                                                                                             sysname 
##                                                                                            "Darwin" 
##                                                                                             release 
##                                                                                            "19.6.0" 
##                                                                                             version 
## "Darwin Kernel Version 19.6.0: Thu May  6 00:48:39 PDT 2021; root:xnu-6153.141.33~1/RELEASE_X86_64" 
##                                                                                            nodename 
##                                               "mac-huber20.academic.christs.cam.ac.uk.pool.embl.de" 
##                                                                                             machine 
##                                                                                            "x86_64" 
##                                                                                               login 
##                                                                                             "holly" 
##                                                                                                user 
##                                                                                             "holly" 
##                                                                                      effective_user 
##                                                                                             "holly"
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.1                          
##  [2] stringr_1.4.0                          
##  [3] dplyr_1.0.7                            
##  [4] purrr_0.3.4                            
##  [5] readr_1.4.0                            
##  [6] tibble_3.1.2                           
##  [7] tidyverse_1.3.1                        
##  [8] tidyr_1.1.3                            
##  [9] rlang_0.4.11                           
## [10] maxstat_0.7-25                         
## [11] data.table_1.14.0                      
## [12] png_0.1-7                              
## [13] formattable_0.2.1                      
## [14] broom_0.7.8                            
## [15] scales_1.1.1                           
## [16] gtable_0.3.0                           
## [17] genomation_1.24.0                      
## [18] ChIPseeker_1.28.3                      
## [19] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [20] GenomicFeatures_1.44.0                 
## [21] msigdbr_7.4.1                          
## [22] org.Hs.eg.db_3.13.0                    
## [23] AnnotationDbi_1.54.1                   
## [24] clusterProfiler_4.0.0                  
## [25] ConsensusClusterPlus_1.56.0            
## [26] glmnet_4.1-2                           
## [27] Matrix_1.3-4                           
## [28] survminer_0.4.9                        
## [29] ggpubr_0.4.0                           
## [30] DESeq2_1.32.0                          
## [31] SummarizedExperiment_1.22.0            
## [32] Biobase_2.52.0                         
## [33] MatrixGenerics_1.4.0                   
## [34] matrixStats_0.59.0                     
## [35] GenomicRanges_1.44.0                   
## [36] GenomeInfoDb_1.28.1                    
## [37] IRanges_2.26.0                         
## [38] S4Vectors_0.30.0                       
## [39] BiocGenerics_0.38.0                    
## [40] ggfortify_0.4.11                       
## [41] pheatmap_1.0.12                        
## [42] magrittr_2.0.1                         
## [43] ggbeeswarm_0.6.0                       
## [44] ggrepel_0.9.1                          
## [45] RColorBrewer_1.1-2                     
## [46] cowplot_1.1.1                          
## [47] patchwork_1.1.1                        
## [48] magick_2.7.2                           
## [49] gridExtra_2.3                          
## [50] Hmisc_4.5-0                            
## [51] ggplot2_3.3.5                          
## [52] Formula_1.2-4                          
## [53] survival_3.2-11                        
## [54] lattice_0.20-44                        
## [55] plyr_1.8.6                             
## [56] BiocStyle_2.20.2                       
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.3           rtracklayer_1.52.0       exactRankTests_0.8-32   
##   [4] bit64_4.0.5              knitr_1.33               DelayedArray_0.18.0     
##   [7] rpart_4.1-15             KEGGREST_1.32.0          RCurl_1.98-1.3          
##  [10] generics_0.1.0           RSQLite_2.2.7            shadowtext_0.0.8        
##  [13] bit_4.0.4                enrichplot_1.12.2        lubridate_1.7.10        
##  [16] xml2_1.3.2               assertthat_0.2.1         viridis_0.6.1           
##  [19] xfun_0.24                hms_1.1.0                babelgene_21.4          
##  [22] evaluate_0.14            fansi_0.5.0              restfulr_0.0.13         
##  [25] progress_1.2.2           caTools_1.18.2           dbplyr_2.1.1            
##  [28] readxl_1.3.1             km.ci_0.5-2              igraph_1.2.6            
##  [31] DBI_1.1.1                geneplotter_1.70.0       htmlwidgets_1.5.3       
##  [34] ellipsis_0.3.2           crosstalk_1.1.1          backports_1.2.1         
##  [37] bookdown_0.22.3          markdown_1.1             annotate_1.70.0         
##  [40] gridBase_0.4-7           biomaRt_2.48.2           vctrs_0.3.8             
##  [43] abind_1.4-5              cachem_1.0.5             withr_2.4.2             
##  [46] ggforce_0.3.3            BSgenome_1.60.0          checkmate_2.0.0         
##  [49] GenomicAlignments_1.28.0 treeio_1.16.1            prettyunits_1.1.1       
##  [52] cluster_2.1.2            DOSE_3.18.1              mltools_0.3.5           
##  [55] ape_5.5                  lazyeval_0.2.2           crayon_1.4.1            
##  [58] genefilter_1.74.0        labeling_0.4.2           pkgconfig_2.0.3         
##  [61] tweenr_1.0.2             nlme_3.1-152             vipor_0.4.5             
##  [64] nnet_7.3-16              lifecycle_1.0.0          downloader_0.4          
##  [67] filelock_1.0.2           BiocFileCache_2.0.0      modelr_0.1.8            
##  [70] seqPattern_1.24.0        cellranger_1.1.0         polyclip_1.10-0         
##  [73] aplot_0.0.6              KMsurv_0.1-5             carData_3.0-4           
##  [76] boot_1.3-28              zoo_1.8-9                reprex_2.0.0            
##  [79] base64enc_0.1-3          beeswarm_0.4.0           viridisLite_0.4.0       
##  [82] rjson_0.2.20             bitops_1.0-7             KernSmooth_2.23-20      
##  [85] Biostrings_2.60.1        blob_1.2.1               shape_1.4.6             
##  [88] pdftools_3.0.1           qvalue_2.24.0            qpdf_1.1                
##  [91] jpeg_0.1-8.1             rstatix_0.7.0            ggsignif_0.6.2          
##  [94] memoise_2.0.0            gplots_3.1.1             zlibbioc_1.38.0         
##  [97] compiler_4.1.0           scatterpie_0.1.6         BiocIO_1.2.0            
## [100] plotrix_3.8-1            cli_3.0.0                Rsamtools_2.8.0         
## [103] XVector_0.32.0           htmlTable_2.2.1          MASS_7.3-54             
## [106] tidyselect_1.1.1         stringi_1.6.2            highr_0.9               
## [109] yaml_2.2.1               GOSemSim_2.18.0          askpass_1.1             
## [112] locfit_1.5-9.4           latticeExtra_0.6-29      survMisc_0.5.5          
## [115] fastmatch_1.1-0          tools_4.1.0              rio_0.5.27              
## [118] rstudioapi_0.13          foreach_1.5.1            foreign_0.8-81          
## [121] farver_2.1.0             ggraph_2.0.5             digest_0.6.27           
## [124] rvcheck_0.1.8            BiocManager_1.30.16      ggtext_0.1.1            
## [127] gridtext_0.1.4           Rcpp_1.0.6               car_3.0-11              
## [130] httr_1.4.2               colorspace_2.0-2         rvest_1.0.0             
## [133] fs_1.5.0                 XML_3.99-0.6             splines_4.1.0           
## [136] tidytree_0.3.4           graphlayouts_0.7.1       xtable_1.8-4            
## [139] jsonlite_1.7.2           ggtree_3.0.2             tidygraph_1.2.0         
## [142] R6_2.5.0                 pillar_1.6.1             htmltools_0.5.1.1       
## [145] DT_0.18                  glue_1.4.2               fastmap_1.1.0           
## [148] BiocParallel_1.26.1      codetools_0.2-18         fgsea_1.18.0            
## [151] mvtnorm_1.1-2            utf8_1.2.1               curl_4.3.2              
## [154] gtools_3.9.2             zip_2.2.0                GO.db_3.13.0            
## [157] openxlsx_4.2.4           rmarkdown_2.9            munsell_0.5.0           
## [160] DO.db_2.9                GenomeInfoDbData_1.2.6   iterators_1.0.13        
## [163] impute_1.66.0            haven_2.4.1              reshape2_1.4.4

In this sub-vignette we present the analysis and source code for figure 5. This sub-vignette can be built along with all other sub-vignettes, by running CLLCytokineScreen2021.Rmd.

28 Set up

Load Libraries

library(patchwork)
library(ggplot2)
library(data.table)
library(magrittr)
library(ggplot2)
library(pheatmap)
library(tidyr)
library(dplyr)

Set plot directory

plotDir = ifelse(exists(".standalone"), "", "../../inst/figs/")
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)

29 Load data

Raw

#df: tibble containing all screening viability data
load( "../../data/df.RData")
#from tsvs
df <- read.table(file= "../../inst/extdata/df.txt",header = TRUE) %>% as_tibble()

df$Cytokine <- factor(df$Cytokine, levels= c("No Cytokine",
                                              "Resiquimod", 
                                              "IL-4", 
                                              "TGF-b1",
                                              "IL-1b",
                                              "Interferon gamma",
                                              "SDF-1a",
                                              "sCD40L" ,
                                              "sCD40L+IL-4",
                                              "soluble anti-IgM", 
                                              "CpG ODN",
                                              "IL-6",
                                              "IL-10",
                                              "IL-21",
                                              "HS-5 CM", 
                                              "IL-15",
                                              "BAFF",
                                              "IL-2" ))

df$Drug <- factor(df$Drug, levels =c("DMSO",
                                     "BAY-11-7085",
                                     "Everolimus",
                                     "Fludarabine",
                                     "I-BET 762",
                                     "Ibrutinib","Idelalisib",
                                     "Luminespib",
                                     "Nutlin-3a",
                                     "PRT062607",
                                     "Pyridone-6",
                                     "Ralimetinib",
                                     "Selumetinib"))

Process data

# List of Cytokines, Drugs and treatments 
  ##Drugs
  thedrugs <- unique(df$Drug) 

  ##Cytokines
  thecytokines <- unique(df$Cytokine) 

  #Treatments
  #drugs
  drugtreatments <- list()
  drugtreatments <- 
    lapply(thedrugs, function(x){
      paste("treatment_drug", x, sep='')
      })
  
  
  #cytokines
  cytokinetreatments <- list()
  cytokinetreatments <- 
    lapply(thecytokines, function(y){
      paste("treatment_cytokine", y, sep='')
      })


# Tidy up data frame to use in linear model
df <- 
  dplyr::filter(df,
                #use high concentrations only
                Drug_Concentration %in% c("High","None")) %>% 
  #select columns for linear model
  dplyr::select(PatientID, Drug, Cytokine, Log) %>%  
        
  #rename columns 
  plyr::rename(c("Drug"="treatment_drug", "Cytokine"="treatment_cytokine", "Log"="Viability"))

30 Set aesthetics

source("../../R/themes_colors.R")

31 Pre - work

Fit the linear model

# define the base level per treatment_type as the "no"-treatment
df$treatment_drug <- as.factor(df$treatment_drug)
df$treatment_cytokine <- as.factor(df$treatment_cytokine)

df$treatment_drug %<>% relevel("DMSO")
df$treatment_cytokine %<>% relevel("No Cytokine")


# inspect design matrix of interaction model
# model.matrix(~treatment_drug + treatment_cytokine + treatment_drug:treatment_cytokine, df)

# fit model with interaction
fit <- lm(Viability ~ treatment_drug * treatment_cytokine, df)

Extract p values

#extract p values

pvalues <- summary(fit)$coefficients[,4]

#order as a dataframe
pvaldf <- as.data.frame(as.matrix(pvalues)) %>% 
          setDT(keep.rownames = TRUE)
          
#rename columns
colnames(pvaldf) <- c("treatment", "pvalue")

#filter out single agent treatments
singletreatments <- c(drugtreatments, cytokinetreatments, "(Intercept)")

pvaldf <- dplyr::filter(pvaldf, !treatment %in% singletreatments)


pvaldf <- 
  pvaldf %>% 
  #remove "treatment" prefix
  mutate_at(vars(treatment), funs(as.character(gsub("treatment_drug", "", .)))) %>% 
  mutate_at(vars(treatment), funs(as.character(gsub("treatment_cytokine", "", .))))
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
  #split drug:cytokine into two columns
pvaldf <-   
  data.frame(pvaldf, do.call(rbind, strsplit(pvaldf$treatment, split = ":", fixed = TRUE)))

#renove treatment column
pvaldf <- pvaldf[,c("pvalue","X1", "X2")]

#rename columns
colnames(pvaldf) <- c("pvalue","drug", "Cytokine")

32 Plot Figures

32.1 Fig 5A

Dummy plots

dummydata <-
  as.data.frame(list(c(1,2,3,4),
                     c(0,0,0,0),
                     c(-0.6,-0.6,0,-0.1), 
                     c(0.6,0.6,0.1,-0.1), 
                     c(0.5,-0.5,0.8,-0.8)), 
                col.names=c("plot",
                            "coord1", 
                            "coord2", 
                            "coord3", 
                            "coord4")) %>% 
  as_tibble()%>% 
  
  mutate(expected = coord2 + coord3, measured = coord4) %>% 
  
  pivot_longer(cols = coord1:measured, names_to = c("plot"), values_to="y.value", names_repair="unique" ) %>% 
  
  dplyr::rename(Plot = plot...1, x.value = plot...2) %>% 
  
  mutate(x.value = factor(x.value, levels = c("coord1", "coord2", "coord3", "coord4", "expected", "measured"))) %>% 
  
  #add annotation of interaction type
  mutate(Plot = factor(Plot, 
                       labels = c("stimulus inhibits drug", 
                                  "drug inhibits stimulus", 
                                  "synergistic pro-survival effect", 
                                  "synergistic toxicity")))
## New names:
## * plot -> plot...1
## * plot -> plot...2
gg_dummy =
  #for each interaction type, make a dummy plot
  lapply(unique(dummydata$Plot), function(i){
    
    tmp <- dplyr::filter(dummydata, Plot==i )
    
    #get value for interaction
    interaction = 
      unlist(dplyr::select(dplyr::filter(tmp, x.value=="measured"), y.value)) - 
      unlist(dplyr::select(dplyr::filter(tmp,x.value=="expected"), y.value))
    
    #plot dummy plot
    gg_dummy <- 
      ggplot(dplyr::filter(tmp, grepl('coord', x.value)), 
             aes(x=x.value, y=y.value, group=Plot))+
  
  geom_hline(yintercept = 0, linetype=2, color="black")+
  geom_point(colour=ifelse(interaction > 0, "#A6093D","#003DA5"), size=1)+
  geom_line(colour=ifelse(interaction > 0, "#A6093D","#003DA5"), size=1)+
  
  geom_segment(data=dplyr::filter(tmp, grepl('expected', x.value)), aes(x = 3.5, y = y.value, xend = 4.5, yend = y.value), color="blue", size=2) +
  geom_segment(data=dplyr::filter(tmp, grepl('measured', x.value)), aes(x = 3.5, y = y.value, xend = 4.5, yend = y.value), color="black", size=2)+
  
  theme_bw() +
  t1 +
  xlab("") + 
  ylab("") +
  coord_cartesian(ylim = c(-1, 1)) +
  scale_x_discrete(labels=c( "coord1"="DMSO", "coord2"="Drug","coord3"="Stimulus","coord4"="Drug +\nStimulus")) +
    theme(axis.ticks.x= element_blank(),axis.ticks.y = element_blank(),
          plot.tag=element_text(size = 20)) +
      
  scale_y_continuous(breaks = c(-1,0,1), sec.axis = 
                       sec_axis(trans=~exp(1)^.*100, 
                                breaks=c(50,100,200), 
                                labels=c("50%", "100%", "200%")))

})


#construct plot

Fig5A <- wrap_elements(
  
  gg_dummy[[1]] + 
  labs(title=expression(paste("Positive ", beta["int"] ))) +
  ylab("Antagonism")+

  gg_dummy[[2]] +
  labs(title=expression(paste("Negative ", beta["int"] )))+

  gg_dummy[[3]] +
  ylab("Synergism") +
    
  gg_dummy[[4]]+
  
  plot_layout(nrow=2) +
  plot_annotation(tag_levels = "I")
)

Fig5A

32.2 Fig 5B

Barplot quantifying interaction types

fit_as_df <- 
  fit$coefficients %>% 
  as.matrix() %>% 
  as.data.frame() %>%
  setDT(keep.rownames = TRUE) %>% 
  as_tibble() %>% 
  dplyr::rename(Condition = rn, comb_coefficient = V1) %>% 
  dplyr::filter(grepl("drug", Condition)&grepl("cytokine", Condition)) %>% 
  mutate(Condition = gsub("treatment_drug", "", Condition)) %>% 
  mutate(Condition = gsub("treatment_cytokine", "", Condition)) %>% 
  separate(Condition, c("Drug", "Cytokine"), sep=":")

drug_fit_as_df <- 
  fit$coefficients %>% 
  as.matrix() %>%
  as.data.frame() %>%
  setDT(keep.rownames = TRUE) %>% 
  as_tibble() %>% 
  dplyr::rename(Drug = rn, drug_coefficent=V1) %>% 
  dplyr::filter(grepl("drug", Drug)&!grepl("cytokine", Drug)) %>% 
  mutate(Drug = gsub("treatment_drug", "", Drug))


cyt_fit_as_df <-
  fit$coefficients %>% 
  as.matrix() %>% 
  as.data.frame() %>%
  setDT(keep.rownames = TRUE) %>% 
  as_tibble() %>% 
  dplyr::rename(Cytokine=rn, cyt_coefficent=V1) %>% 
  dplyr::filter(!grepl("drug", Cytokine)&grepl("cytokine", Cytokine)) %>% 
  mutate(Cytokine = gsub("treatment_cytokine", "", Cytokine))


fit_combinations <-
  left_join(fit_as_df, drug_fit_as_df, by = "Drug") %>% 
  left_join(cyt_fit_as_df, by = "Cytokine") %>% 
  mutate(predicted_coefficient=cyt_coefficent+drug_coefficent) %>% 
  mutate(obs_coef=predicted_coefficient+comb_coefficient) %>%
  left_join(mutate(pvaldf,drug=as.character(drug),Cytokine=as.character(Cytokine)), by=c("Drug"="drug", "Cytokine"="Cytokine")) %>% 
  dplyr::filter(pvalue<=0.05) %>%
  mutate(Category_Symbol=case_when(
    comb_coefficient>=0&(obs_coef<cyt_coefficent|obs_coef<drug_coefficent)~ "I",  
    comb_coefficient>=0&(obs_coef>cyt_coefficent&obs_coef>drug_coefficent)~ "III",
    comb_coefficient<0&(obs_coef>cyt_coefficent|obs_coef>drug_coefficent)~ "II",  
    comb_coefficient<0&(obs_coef<cyt_coefficent&obs_coef<drug_coefficent)~ "IV"
    )) %>% 
  mutate(Category=case_when(
    Category_Symbol=="I"~ "Positive Antagonistic",  
    Category_Symbol=="III"~ "Positive Synergistic",
    Category_Symbol=="II"~ "Negative Antagonistic",  
    Category_Symbol=="IV"~ "Negative Synergistic"
    )) %>% 
  mutate( Category=factor(Category, levels=c("Positive Antagonistic","Negative Antagonistic","Positive Synergistic","Negative Synergistic")))
  

Fig5B <-
  fit_combinations %>% 
  dplyr::group_by(Category) %>% 
  tally() %>% 
  
ggplot(aes(x=Category, y=n, fill=Category))+
  geom_bar(width = 1, stat = "identity")+
  t1+
  guides(fill="none")+
  xlab("")+ ylab("")+
  scale_fill_manual(values = c("#A6093D","#003DA5","#A6093D","#003DA5"))

Fig5B

32.3 Fig 5C

Heatmap of significant drug : stimulus interaction coefficients
Get interaction coefficients matrix for plotting heatmap

#extract beta interaction values
betavalues <- summary(fit)$coefficients[,1]

#create matrix of beta values 
betadf <- as.data.frame(as.matrix(betavalues)) %>%
          setDT(keep.rownames = TRUE)

#rename columns
colnames(betadf) <- c("treatment", "betavalue")

#remove beta values for drugs and cytokines alone
betadf <- dplyr::filter(betadf, !treatment %in% singletreatments)

#remove treatment prefix
betadf <- betadf %>% 
  mutate_at(vars(treatment), funs(as.character(gsub("treatment_drug", "", .)))) %>% 
  mutate_at(vars(treatment), funs(as.character(gsub("treatment_cytokine", "", .))))

#split drug:cytokine by colon
betadf <- data.frame(betadf, do.call(rbind, strsplit(betadf$treatment, split = ":", fixed = TRUE)))

#select columns of interest
betadf <- betadf[,c("betavalue","X1", "X2")]

#rename columns 
colnames(betadf) <- c("betavalue","drug", "Cytokine")
#set significance
a <- 0.05

#make a matrix of beta values, where  beta = 0 if corresponding p val is > than significance threshold 
pvalmat <- xtabs(pvalue~drug+Cytokine, data=pvaldf)
bvalmat <- xtabs(betavalue~drug+Cytokine, data=betadf)

#check in same order
bvalmat[,colnames(pvalmat)] 
##              Cytokine
## drug                   BAFF       CpG ODN       HS-5 CM         IL-10
##   BAY-11-7085 -5.604631e-02 -3.181140e-01  1.492971e-03 -1.575975e-02
##   Everolimus   3.655902e-02  1.497761e-02  5.377161e-02  7.948169e-02
##   Fludarabine -2.337342e-02  1.903315e-01  6.711677e-02 -3.913832e-02
##   I-BET 762    4.216039e-02 -2.331726e-01 -7.116831e-02 -2.652204e-02
##   Ibrutinib   -2.625863e-02 -2.269246e-01  7.477386e-02  2.731021e-02
##   Idelalisib   8.275484e-03 -2.652441e-01  3.386327e-02  3.058002e-02
##   Luminespib  -2.482555e-01 -2.591311e-01 -3.940346e-01 -6.458359e-02
##   Nutlin-3a   -1.258362e-03  1.577514e-01  7.700330e-02  9.299536e-02
##   PRT062607   -6.653378e-02 -8.644230e-02  1.557073e-02 -8.013149e-03
##   Pyridone-6  -8.023060e-04 -8.100430e-03  2.925389e-02  3.243689e-02
##   Ralimetinib -3.093962e-02 -1.717127e-01  1.858811e-02 -1.073542e-02
##   Selumetinib  5.947874e-03 -1.304805e-01  7.060345e-02  2.145764e-02
##              Cytokine
## drug                  IL-15         IL-1b          IL-2         IL-21
##   BAY-11-7085 -5.816512e-03  2.152596e-02 -2.134484e-02  2.072687e-03
##   Everolimus   2.725033e-02  3.500697e-02 -2.677401e-03  3.495098e-02
##   Fludarabine -4.310581e-02  3.505833e-02 -2.214672e-02 -4.860353e-02
##   I-BET 762    9.141791e-04  2.740045e-03 -2.907140e-02 -2.523721e-02
##   Ibrutinib    5.752202e-02  1.849707e-03  4.678019e-02  3.710308e-02
##   Idelalisib   5.658295e-02  1.936971e-02  4.584077e-02  3.477902e-02
##   Luminespib  -6.061613e-02 -1.263584e-01 -6.993392e-02 -7.742445e-02
##   Nutlin-3a    1.046024e-01  2.482328e-02  1.463441e-01  7.912615e-02
##   PRT062607   -3.957221e-03 -2.216149e-02  5.916628e-03  8.482059e-03
##   Pyridone-6   5.308392e-02  1.514414e-02  3.040022e-02 -7.237789e-04
##   Ralimetinib  1.202065e-02 -2.568524e-03 -1.468196e-02 -4.467606e-02
##   Selumetinib  5.942633e-02  1.780087e-02  3.092410e-02  2.662341e-02
##              Cytokine
## drug                   IL-4          IL-6 Interferon gamma    Resiquimod
##   BAY-11-7085  2.235761e-02  4.554573e-02     5.434015e-02 -3.639915e-01
##   Everolimus   1.332949e-01  6.817609e-03     5.469766e-02  2.765104e-02
##   Fludarabine  3.953363e-01 -2.469675e-02     4.886592e-02  2.265297e-01
##   I-BET 762   -2.665207e-02 -2.262955e-02     1.357931e-02 -3.265356e-01
##   Ibrutinib    1.836975e-01  6.642272e-02     1.101352e-01 -2.509692e-01
##   Idelalisib   1.657969e-01  6.363235e-02     1.171099e-01 -2.064261e-01
##   Luminespib  -4.429811e-02 -3.220023e-03    -2.221338e-02 -7.116024e-02
##   Nutlin-3a    5.337703e-01  1.136707e-01     1.433726e-01  1.426722e-01
##   PRT062607    1.388865e-01 -1.097500e-02     2.337567e-02 -3.612143e-01
##   Pyridone-6  -2.305619e-02  6.847247e-02     6.271102e-02  1.349796e-03
##   Ralimetinib  7.075108e-02  2.282944e-03     2.190150e-01 -1.118603e-01
##   Selumetinib  1.174186e-01  6.322721e-02     8.349273e-02 -1.148979e-01
##              Cytokine
## drug                 sCD40L   sCD40L+IL-4        SDF-1a soluble anti-IgM
##   BAY-11-7085  2.049505e-02 -1.619583e-01  2.831627e-02    -6.597054e-03
##   Everolimus   5.178276e-02  1.386887e-01 -2.677009e-03     5.541420e-04
##   Fludarabine  1.020548e-02  3.687631e-01  5.075778e-02     3.435734e-02
##   I-BET 762    4.068479e-02  6.576262e-03 -4.944505e-03    -2.502231e-02
##   Ibrutinib   -4.740237e-03  8.873182e-02  2.038604e-02     1.260542e-02
##   Idelalisib   2.027841e-02  8.430112e-02 -2.252409e-05     2.338468e-02
##   Luminespib  -1.543305e-01 -3.482382e-01 -8.004006e-02    -4.275447e-01
##   Nutlin-3a    1.834396e-02  4.643749e-01  1.258999e-01     5.912671e-03
##   PRT062607   -8.838768e-02  1.457566e-01 -2.877775e-02    -6.326385e-02
##   Pyridone-6   3.934846e-02 -1.855970e-01  3.145554e-02    -6.082148e-03
##   Ralimetinib -1.691833e-04 -3.878371e-02  1.575747e-03    -2.273022e-02
##   Selumetinib  5.122307e-02  4.874905e-02 -2.510831e-02     2.421239e-02
##              Cytokine
## drug                 TGF-b1
##   BAY-11-7085 -4.509014e-03
##   Everolimus  -8.310092e-03
##   Fludarabine -4.636054e-02
##   I-BET 762   -9.396045e-02
##   Ibrutinib    1.958423e-02
##   Idelalisib   9.050594e-03
##   Luminespib  -8.478118e-02
##   Nutlin-3a   -1.529464e-02
##   PRT062607    2.369375e-03
##   Pyridone-6   3.081690e-02
##   Ralimetinib  1.357279e-02
##   Selumetinib  2.234705e-02
bvalmat[rownames(pvalmat),]
##              Cytokine
## drug                   BAFF       CpG ODN       HS-5 CM         IL-10
##   BAY-11-7085 -5.604631e-02 -3.181140e-01  1.492971e-03 -1.575975e-02
##   Everolimus   3.655902e-02  1.497761e-02  5.377161e-02  7.948169e-02
##   Fludarabine -2.337342e-02  1.903315e-01  6.711677e-02 -3.913832e-02
##   I-BET 762    4.216039e-02 -2.331726e-01 -7.116831e-02 -2.652204e-02
##   Ibrutinib   -2.625863e-02 -2.269246e-01  7.477386e-02  2.731021e-02
##   Idelalisib   8.275484e-03 -2.652441e-01  3.386327e-02  3.058002e-02
##   Luminespib  -2.482555e-01 -2.591311e-01 -3.940346e-01 -6.458359e-02
##   Nutlin-3a   -1.258362e-03  1.577514e-01  7.700330e-02  9.299536e-02
##   PRT062607   -6.653378e-02 -8.644230e-02  1.557073e-02 -8.013149e-03
##   Pyridone-6  -8.023060e-04 -8.100430e-03  2.925389e-02  3.243689e-02
##   Ralimetinib -3.093962e-02 -1.717127e-01  1.858811e-02 -1.073542e-02
##   Selumetinib  5.947874e-03 -1.304805e-01  7.060345e-02  2.145764e-02
##              Cytokine
## drug                  IL-15         IL-1b          IL-2         IL-21
##   BAY-11-7085 -5.816512e-03  2.152596e-02 -2.134484e-02  2.072687e-03
##   Everolimus   2.725033e-02  3.500697e-02 -2.677401e-03  3.495098e-02
##   Fludarabine -4.310581e-02  3.505833e-02 -2.214672e-02 -4.860353e-02
##   I-BET 762    9.141791e-04  2.740045e-03 -2.907140e-02 -2.523721e-02
##   Ibrutinib    5.752202e-02  1.849707e-03  4.678019e-02  3.710308e-02
##   Idelalisib   5.658295e-02  1.936971e-02  4.584077e-02  3.477902e-02
##   Luminespib  -6.061613e-02 -1.263584e-01 -6.993392e-02 -7.742445e-02
##   Nutlin-3a    1.046024e-01  2.482328e-02  1.463441e-01  7.912615e-02
##   PRT062607   -3.957221e-03 -2.216149e-02  5.916628e-03  8.482059e-03
##   Pyridone-6   5.308392e-02  1.514414e-02  3.040022e-02 -7.237789e-04
##   Ralimetinib  1.202065e-02 -2.568524e-03 -1.468196e-02 -4.467606e-02
##   Selumetinib  5.942633e-02  1.780087e-02  3.092410e-02  2.662341e-02
##              Cytokine
## drug                   IL-4          IL-6 Interferon gamma    Resiquimod
##   BAY-11-7085  2.235761e-02  4.554573e-02     5.434015e-02 -3.639915e-01
##   Everolimus   1.332949e-01  6.817609e-03     5.469766e-02  2.765104e-02
##   Fludarabine  3.953363e-01 -2.469675e-02     4.886592e-02  2.265297e-01
##   I-BET 762   -2.665207e-02 -2.262955e-02     1.357931e-02 -3.265356e-01
##   Ibrutinib    1.836975e-01  6.642272e-02     1.101352e-01 -2.509692e-01
##   Idelalisib   1.657969e-01  6.363235e-02     1.171099e-01 -2.064261e-01
##   Luminespib  -4.429811e-02 -3.220023e-03    -2.221338e-02 -7.116024e-02
##   Nutlin-3a    5.337703e-01  1.136707e-01     1.433726e-01  1.426722e-01
##   PRT062607    1.388865e-01 -1.097500e-02     2.337567e-02 -3.612143e-01
##   Pyridone-6  -2.305619e-02  6.847247e-02     6.271102e-02  1.349796e-03
##   Ralimetinib  7.075108e-02  2.282944e-03     2.190150e-01 -1.118603e-01
##   Selumetinib  1.174186e-01  6.322721e-02     8.349273e-02 -1.148979e-01
##              Cytokine
## drug                 sCD40L   sCD40L+IL-4        SDF-1a soluble anti-IgM
##   BAY-11-7085  2.049505e-02 -1.619583e-01  2.831627e-02    -6.597054e-03
##   Everolimus   5.178276e-02  1.386887e-01 -2.677009e-03     5.541420e-04
##   Fludarabine  1.020548e-02  3.687631e-01  5.075778e-02     3.435734e-02
##   I-BET 762    4.068479e-02  6.576262e-03 -4.944505e-03    -2.502231e-02
##   Ibrutinib   -4.740237e-03  8.873182e-02  2.038604e-02     1.260542e-02
##   Idelalisib   2.027841e-02  8.430112e-02 -2.252409e-05     2.338468e-02
##   Luminespib  -1.543305e-01 -3.482382e-01 -8.004006e-02    -4.275447e-01
##   Nutlin-3a    1.834396e-02  4.643749e-01  1.258999e-01     5.912671e-03
##   PRT062607   -8.838768e-02  1.457566e-01 -2.877775e-02    -6.326385e-02
##   Pyridone-6   3.934846e-02 -1.855970e-01  3.145554e-02    -6.082148e-03
##   Ralimetinib -1.691833e-04 -3.878371e-02  1.575747e-03    -2.273022e-02
##   Selumetinib  5.122307e-02  4.874905e-02 -2.510831e-02     2.421239e-02
##              Cytokine
## drug                 TGF-b1
##   BAY-11-7085 -4.509014e-03
##   Everolimus  -8.310092e-03
##   Fludarabine -4.636054e-02
##   I-BET 762   -9.396045e-02
##   Ibrutinib    1.958423e-02
##   Idelalisib   9.050594e-03
##   Luminespib  -8.478118e-02
##   Nutlin-3a   -1.529464e-02
##   PRT062607    2.369375e-03
##   Pyridone-6   3.081690e-02
##   Ralimetinib  1.357279e-02
##   Selumetinib  2.234705e-02
bvalmat[pvalmat >= a] = 0

#transform matrix 
bvalmat <- t(bvalmat)

tree <- 
  pheatmap(bvalmat, silent = TRUE)


cyt_order <- rownames(bvalmat[tree$tree_row[["order"]],])

drug_order <- colnames(bvalmat[,tree$tree_col[["order"]]])


Fig5C <-
  #append p values
  left_join(betadf, pvaldf, by = c("drug", "Cytokine")) %>% 
  #make beta value 0 if p value is < sig
  mutate(betavalue = ifelse(pvalue<a, betavalue, 0)) %>% 
  #put drugs in order of clustered heatmap
  mutate(drug=factor(drug, levels = drug_order)) %>% 
  
  #put stimuli in order of clustered heatmap
  mutate(Cytokine=factor(Cytokine, levels = rev(cyt_order))) %>% 
  
  #make heatmap with ggplot
  ggplot(aes(x=drug, y=Cytokine))+
  
  geom_tile(aes(fill=betavalue),color = "grey")+
  #set colour scale
  scale_fill_gradientn(colors=c("#003DA5",  "white",  "#A6093D"), limits=c(-0.6,0.6))+
  #set theme
  t2+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.ticks = element_blank(),
        axis.ticks.x =element_blank(),
        panel.background = element_blank(), 
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        legend.key.height=unit(2.5, "cm"))+
  labs(fill = expression(paste(beta["int"], "-value")))+
  #add category
  geom_text(data = fit_combinations, aes(x=Drug, label=Category_Symbol) )+
  scale_y_discrete(labels=c("TGF-b1"="TGF-\u03B21", "sCD40L+IL-4"="sCD40L + IL4", "IL-1b"="IL1\u03B2", "IL-4"="IL4", "IL-6"="IL6","IL-15"="IL15","IL-10"="IL10", "IL-21"="IL21","IL-2"="IL2", "Interferon gamma"= "Interferon \u03B3", "SDF-1a"="SDF-1\u03B1"))
  
Fig5C

32.4 Fig 5D - I

Preparation: Line plots of examples of drug and stimulus combinations that show an interaction

#get lists of drugs and cytokines without baseline treatments
thedrugs.only <- thedrugs %>% setdiff("DMSO") %>% sort()
thecytokines.only <- thecytokines %>% setdiff("No Cytokine")%>% sort()

# get a list of interactions for which p value of beta interaction is significant
sign_conditions <-
  pvaldf %>% 
  dplyr::filter(pvalue<=0.05) %>% 
  mutate(condition = paste0("treatment_drug:", 
                            drug, 
                            ":treatment_cytokine:",
                            Cytokine))


gg <- vector(mode="list", length=length(sign_conditions$condition))
names(gg) <- sign_conditions$condition



plotList <- 
lapply(names(gg), function(i){
  
  #preparations for the interaction plot
  drug <- dplyr::filter(sign_conditions, condition ==i)$drug
  drugTreat <- paste("treatment_drug", drug, sep='')
      
  cyt <- dplyr::filter(sign_conditions, condition ==i)$Cytokine
  cytokineTreat <-paste("treatment_cytokine", cyt, sep='')
      
      
  Category <- fit_combinations[which(fit_combinations$Drug==drug &
                               fit_combinations$Cytokine==cyt),]$Category
  
  #extract coefficients to plot

  ## baseline effect
  baseline <- fit$coefficients["(Intercept)"]

  ## single treatment effect for type_1 = "a"
  single_1 <- fit$coefficients[drugTreat]

    ## single treatment effect for type_2 = "b"
    single_2 <- fit$coefficients[cytokineTreat]

    # interaction Viability
    interaction <- fit$coefficients[paste(drugTreat, cytokineTreat, sep=':')]

      
    ## observed single treatment values
    observed_single_1 <- baseline + single_1 
    observed_single_2 <- baseline + single_2

    ## observed double treatment value
    observed_double <- baseline + single_1 + single_2 + interaction

    ## predicted double treatment value (based only on single effects)
    predicted_double <- baseline + single_1 + single_2

    #prepare labels and aesthetics for plotting
    # y_label <- case_when(cyt=="Interferon gamma"~"Interferon\ngamma",
    #                      cyt=="soluble anti-IgM"~"soluble\nanti-IgM",
    #                      TRUE~cyt)

    xlabels <- c("DMSO", drug, 
                 case_when(cyt=="IL-4"~"IL4",
                           cyt=="Interferon gamma"~"Interferon \u03B3",
                           cyt=="sCD40L+IL-4"~"sCD40L + IL4",
                           cyt=="soluble anti-IgM"~"soluble\nanti-IgM",
                           TRUE~cyt),
                paste(drug, " +\n",
                       case_when(cyt=="IL-4"~"IL4",
                           cyt=="Interferon gamma"~"Interferon \u03B3",
                           cyt=="sCD40L+IL-4"~"sCD40L + IL4",
                           TRUE~cyt),
                sep=""))
      
    segment_size <- 2
    
    
    plotTab <-
      df %>%
      
      dplyr::filter(treatment_drug %in% c("DMSO" , drug)) %>%

      dplyr::filter(treatment_cytokine %in% c("No Cytokine", cyt)) %>%

      dplyr::mutate(treatment_combination = 
                  #if baseline treatment
                  ifelse(treatment_drug=="DMSO" & 
                         treatment_cytokine=="No Cytokine",
                         "DMSO",
                         #if drug only treatment
                         ifelse(treatment_drug == drug & 
                                treatment_cytokine =="No Cytokine",
                                paste0("Drug=", drug),
                                #if cytokine only treatment
                                ifelse(treatment_drug == "DMSO" & 
                                       treatment_cytokine == cyt,
                                       paste0("Cytokine=", cyt),
                                       #otherwise combinatorial treatment
                                       paste0("Drug=", drug, "\nCytokine=", cyt))))) %>%
  #make treatment_combination as a factor
  mutate(treatment_combination = factor(treatment_combination, 
                                        levels=c("DMSO", 
                                                 paste0("Drug=", drug),
                                                 paste0("Cytokine=", cyt),
                                                 paste0("Drug=", drug, "\nCytokine=", cyt)
                                                 )))
  #make plot
        
        

  ggplot(plotTab, aes(treatment_combination, Viability, group=PatientID)) +
  
  geom_hline(yintercept = 0, linetype=2, color="black")+

  lemon::geom_pointline( size=1,   
                         colour=ifelse(interaction > 0,
                                       "#A6093D","#003DA5"), alpha=0.3) +
  #add predicted viability with control treatment
  geom_segment( size=segment_size,
                aes(x=1-.2, xend=1+.2, y=baseline, yend=baseline),
                colour = "black") +
  #add predicted viability with drug only treatment
  geom_segment( size=segment_size,
                aes(x=2-.2, xend=2+.2, y=observed_single_1,yend=observed_single_1),
                colour="black") +
  #add predicted viability with cytokine only treatment
  geom_segment( size=segment_size,
                aes(x=3-.2, xend=3+.2, y=observed_single_2, yend=observed_single_2),
                colour="black") +
  #add predicted viability with both treatments
  geom_segment( size=segment_size,
                aes(x=4-.2, xend=4+.2, y=observed_double, yend=observed_double),
                colour="black") +
  #add predicted viability with both treatments, without interaction
  geom_segment( size=segment_size,
                aes(x=4-.2, xend=4+.2, y=predicted_double, 
                    yend=predicted_double), color="blue") + 
 
   #add category and combination as title
    
    
    
  ggtitle(paste( Category,": ", drug, " & ", 
                 case_when(cyt=="IL-4"~"IL4",
                           cyt=="Interferon gamma"~"Interferon \u03B3",
                           cyt=="sCD40L+IL-4"~"sCD40L + IL4",
                           TRUE~cyt), sep="")) + 
  
  #add theme
  t2 + theme(axis.text.x = element_text(size=fontsize+5)) +
    theme(plot.tag=element_text(size = 30))+
  scale_x_discrete(labels = xlabels) +
  ylab("Log (Viability)") + 
  xlab("")


  } ) 

names(plotList) <- names(gg)

32.5 Fig 5D

wrap_plots(plotList$`treatment_drug:Ibrutinib:treatment_cytokine:IL-4`)

32.6 Fig 5E

wrap_plots(plotList$`treatment_drug:Ibrutinib:treatment_cytokine:Interferon gamma`)

32.7 Fig 5F

wrap_plots(plotList$`treatment_drug:Pyridone-6:treatment_cytokine:sCD40L+IL-4`)

32.8 Fig 5G

wrap_plots(plotList$`treatment_drug:Ralimetinib:treatment_cytokine:Interferon gamma`)

32.9 Fig 5H

wrap_plots(plotList$`treatment_drug:Ibrutinib:treatment_cytokine:CpG ODN`)

32.10 Fig 5G

wrap_plots(plotList$`treatment_drug:Luminespib:treatment_cytokine:soluble anti-IgM`)

33 Arrange plots

design1 <-"
  1223
  4455
  6677
  8899
  "

Fig5A + theme(plot.tag=element_text(size = 30)) +

wrap_elements(Fig5B) + theme(plot.tag=element_text(size = 30)) +
  
wrap_elements(Fig5C) + theme(plot.tag=element_text(size = 30)) +
  
wrap_plots(plotList$`treatment_drug:Ibrutinib:treatment_cytokine:IL-4`)+
  
wrap_plots(plotList$`treatment_drug:Ibrutinib:treatment_cytokine:Interferon gamma`)+
  
wrap_plots(plotList$`treatment_drug:Pyridone-6:treatment_cytokine:sCD40L+IL-4`)+
  
wrap_plots(plotList$`treatment_drug:Ralimetinib:treatment_cytokine:Interferon gamma`)+
  
wrap_plots(plotList$`treatment_drug:Ibrutinib:treatment_cytokine:CpG ODN`)+
  
wrap_plots(plotList$`treatment_drug:Luminespib:treatment_cytokine:soluble anti-IgM`)+
  
plot_layout(design=design1,
            heights = c(1,0.5,0.5,0.5),
            widths = c(0.92,0.08,0.05,0.95)) +

  plot_annotation( tag_levels = "A", theme=theme(plot.title = element_text(face="plain", size=28, hjust = 0.5)))

34 Appendix

Sys.info()
##                                                                                             sysname 
##                                                                                            "Darwin" 
##                                                                                             release 
##                                                                                            "19.6.0" 
##                                                                                             version 
## "Darwin Kernel Version 19.6.0: Thu May  6 00:48:39 PDT 2021; root:xnu-6153.141.33~1/RELEASE_X86_64" 
##                                                                                            nodename 
##                                               "mac-huber20.academic.christs.cam.ac.uk.pool.embl.de" 
##                                                                                             machine 
##                                                                                            "x86_64" 
##                                                                                               login 
##                                                                                             "holly" 
##                                                                                                user 
##                                                                                             "holly" 
##                                                                                      effective_user 
##                                                                                             "holly"
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.1                          
##  [2] stringr_1.4.0                          
##  [3] dplyr_1.0.7                            
##  [4] purrr_0.3.4                            
##  [5] readr_1.4.0                            
##  [6] tibble_3.1.2                           
##  [7] tidyverse_1.3.1                        
##  [8] tidyr_1.1.3                            
##  [9] rlang_0.4.11                           
## [10] maxstat_0.7-25                         
## [11] data.table_1.14.0                      
## [12] png_0.1-7                              
## [13] formattable_0.2.1                      
## [14] broom_0.7.8                            
## [15] scales_1.1.1                           
## [16] gtable_0.3.0                           
## [17] genomation_1.24.0                      
## [18] ChIPseeker_1.28.3                      
## [19] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [20] GenomicFeatures_1.44.0                 
## [21] msigdbr_7.4.1                          
## [22] org.Hs.eg.db_3.13.0                    
## [23] AnnotationDbi_1.54.1                   
## [24] clusterProfiler_4.0.0                  
## [25] ConsensusClusterPlus_1.56.0            
## [26] glmnet_4.1-2                           
## [27] Matrix_1.3-4                           
## [28] survminer_0.4.9                        
## [29] ggpubr_0.4.0                           
## [30] DESeq2_1.32.0                          
## [31] SummarizedExperiment_1.22.0            
## [32] Biobase_2.52.0                         
## [33] MatrixGenerics_1.4.0                   
## [34] matrixStats_0.59.0                     
## [35] GenomicRanges_1.44.0                   
## [36] GenomeInfoDb_1.28.1                    
## [37] IRanges_2.26.0                         
## [38] S4Vectors_0.30.0                       
## [39] BiocGenerics_0.38.0                    
## [40] ggfortify_0.4.11                       
## [41] pheatmap_1.0.12                        
## [42] magrittr_2.0.1                         
## [43] ggbeeswarm_0.6.0                       
## [44] ggrepel_0.9.1                          
## [45] RColorBrewer_1.1-2                     
## [46] cowplot_1.1.1                          
## [47] patchwork_1.1.1                        
## [48] magick_2.7.2                           
## [49] gridExtra_2.3                          
## [50] Hmisc_4.5-0                            
## [51] ggplot2_3.3.5                          
## [52] Formula_1.2-4                          
## [53] survival_3.2-11                        
## [54] lattice_0.20-44                        
## [55] plyr_1.8.6                             
## [56] BiocStyle_2.20.2                       
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.3           rtracklayer_1.52.0       exactRankTests_0.8-32   
##   [4] bit64_4.0.5              knitr_1.33               DelayedArray_0.18.0     
##   [7] rpart_4.1-15             KEGGREST_1.32.0          RCurl_1.98-1.3          
##  [10] generics_0.1.0           RSQLite_2.2.7            shadowtext_0.0.8        
##  [13] bit_4.0.4                enrichplot_1.12.2        lubridate_1.7.10        
##  [16] xml2_1.3.2               assertthat_0.2.1         viridis_0.6.1           
##  [19] xfun_0.24                hms_1.1.0                babelgene_21.4          
##  [22] evaluate_0.14            fansi_0.5.0              restfulr_0.0.13         
##  [25] progress_1.2.2           caTools_1.18.2           dbplyr_2.1.1            
##  [28] readxl_1.3.1             km.ci_0.5-2              igraph_1.2.6            
##  [31] DBI_1.1.1                geneplotter_1.70.0       htmlwidgets_1.5.3       
##  [34] ellipsis_0.3.2           crosstalk_1.1.1          backports_1.2.1         
##  [37] bookdown_0.22.3          markdown_1.1             annotate_1.70.0         
##  [40] gridBase_0.4-7           biomaRt_2.48.2           vctrs_0.3.8             
##  [43] abind_1.4-5              cachem_1.0.5             withr_2.4.2             
##  [46] ggforce_0.3.3            BSgenome_1.60.0          checkmate_2.0.0         
##  [49] GenomicAlignments_1.28.0 treeio_1.16.1            prettyunits_1.1.1       
##  [52] cluster_2.1.2            DOSE_3.18.1              mltools_0.3.5           
##  [55] ape_5.5                  lazyeval_0.2.2           crayon_1.4.1            
##  [58] genefilter_1.74.0        labeling_0.4.2           pkgconfig_2.0.3         
##  [61] tweenr_1.0.2             nlme_3.1-152             vipor_0.4.5             
##  [64] nnet_7.3-16              lifecycle_1.0.0          downloader_0.4          
##  [67] filelock_1.0.2           BiocFileCache_2.0.0      modelr_0.1.8            
##  [70] seqPattern_1.24.0        cellranger_1.1.0         polyclip_1.10-0         
##  [73] aplot_0.0.6              KMsurv_0.1-5             carData_3.0-4           
##  [76] boot_1.3-28              zoo_1.8-9                reprex_2.0.0            
##  [79] base64enc_0.1-3          beeswarm_0.4.0           viridisLite_0.4.0       
##  [82] rjson_0.2.20             bitops_1.0-7             KernSmooth_2.23-20      
##  [85] Biostrings_2.60.1        blob_1.2.1               shape_1.4.6             
##  [88] pdftools_3.0.1           qvalue_2.24.0            qpdf_1.1                
##  [91] jpeg_0.1-8.1             rstatix_0.7.0            ggsignif_0.6.2          
##  [94] memoise_2.0.0            gplots_3.1.1             zlibbioc_1.38.0         
##  [97] compiler_4.1.0           scatterpie_0.1.6         BiocIO_1.2.0            
## [100] plotrix_3.8-1            cli_3.0.0                Rsamtools_2.8.0         
## [103] XVector_0.32.0           htmlTable_2.2.1          MASS_7.3-54             
## [106] tidyselect_1.1.1         stringi_1.6.2            highr_0.9               
## [109] yaml_2.2.1               GOSemSim_2.18.0          askpass_1.1             
## [112] locfit_1.5-9.4           latticeExtra_0.6-29      survMisc_0.5.5          
## [115] fastmatch_1.1-0          tools_4.1.0              rio_0.5.27              
## [118] rstudioapi_0.13          foreach_1.5.1            foreign_0.8-81          
## [121] farver_2.1.0             ggraph_2.0.5             digest_0.6.27           
## [124] rvcheck_0.1.8            BiocManager_1.30.16      ggtext_0.1.1            
## [127] gridtext_0.1.4           Rcpp_1.0.6               car_3.0-11              
## [130] httr_1.4.2               colorspace_2.0-2         rvest_1.0.0             
## [133] fs_1.5.0                 XML_3.99-0.6             splines_4.1.0           
## [136] tidytree_0.3.4           graphlayouts_0.7.1       xtable_1.8-4            
## [139] jsonlite_1.7.2           ggtree_3.0.2             tidygraph_1.2.0         
## [142] lemon_0.4.5              R6_2.5.0                 pillar_1.6.1            
## [145] htmltools_0.5.1.1        DT_0.18                  glue_1.4.2              
## [148] fastmap_1.1.0            BiocParallel_1.26.1      codetools_0.2-18        
## [151] fgsea_1.18.0             mvtnorm_1.1-2            utf8_1.2.1              
## [154] curl_4.3.2               gtools_3.9.2             zip_2.2.0               
## [157] GO.db_3.13.0             openxlsx_4.2.4           rmarkdown_2.9           
## [160] munsell_0.5.0            DO.db_2.9                GenomeInfoDbData_1.2.6  
## [163] iterators_1.0.13         impute_1.66.0            haven_2.4.1             
## [166] reshape2_1.4.4

In this sub-vignette we present the analysis and source code for figure 6.This sub-vignette can be built along with all other sub-vignettes by running CLLCytokineScreen2021.Rmd.

35 Set up

set.seed(1996)

Load libraries

library(plyr)
library(dplyr)
library(ggpubr)
library(ggbeeswarm)
library(ggplot2)
library(data.table)
library(magrittr)
library(pheatmap)
library(gtable)
library(glmnet)
library(RColorBrewer)
library(gridExtra)
library(patchwork)
library(cowplot)
library(tidyverse)

Set plot directory

plotDir = ifelse(exists(".standalone"), "", "../../inst/figs/")
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)

36 Load data

Load raw files

#df: tibblecontaining all screening viability data
load( "../../data/df.RData")

#patMeta: tibble containing all patient genetic data
load( "../../data/patMeta.RData")
#From tsvs
#df: tibble containing all screening data
df <- read.table(file= "../../inst/extdata/df.txt",header = TRUE) %>% as_tibble()

#patMeta: tibble containing genetic data for patient samples in screen 
patMeta <- read.table(file= "../../inst/extdata/patMeta.txt",header = TRUE) %>% as_tibble()



#Arrange data for analysis
#screening data
df$Cytokine <- factor(df$Cytokine, levels= c("No Cytokine",
                                              "Resiquimod", 
                                              "IL-4", 
                                              "TGF-b1",
                                              "IL-1b",
                                              "Interferon gamma",
                                              "SDF-1a",
                                              "sCD40L" ,
                                              "sCD40L+IL-4",
                                              "soluble anti-IgM", 
                                              "CpG ODN",
                                              "IL-6",
                                              "IL-10",
                                              "IL-21",
                                              "HS-5 CM", 
                                              "IL-15",
                                              "BAFF",
                                              "IL-2" ))



df$Drug <- factor(df$Drug, levels =c("DMSO",
                                     "BAY-11-7085",
                                     "Everolimus",
                                     "Fludarabine",
                                     "I-BET 762",
                                     "Ibrutinib","Idelalisib",
                                     "Luminespib",
                                     "Nutlin-3a",
                                     "PRT062607",
                                     "Pyridone-6",
                                     "Ralimetinib",
                                     "Selumetinib"))


#patMeta
patMeta %>% mutate_at(vars(-PatientID), as.factor)

Process data

#only high concentrations of drugs
df <- 
  dplyr::filter(df, Drug_Concentration %in% c("None","High")) %>% 
  dplyr::select(PatientID, Drug, Cytokine, DCK, Log)

#rename columns
df <- 
  plyr::rename(df, 
               c("Drug"="treatment_drug", 
                 "Cytokine"="treatment_cytokine", 
                 "Log"="Viability"))

Define lists

#lists for drug and cytokine treatments
thecytokines <- unique(df$treatment_cytokine) %>% setdiff("No Cytokine")
thedrugs <- unique(df$treatment_drug) %>% setdiff("DMSO")

37 Define aesthetics

source("../../R/themes_colors.R")

38 Define Functions

Run Glmnet function: This function takes a feature matrix X of patient genetic features and a continuous response matrix y, containing the interaction coefficients for a given drug : stimulus combination, for each patient. The function runs lasso or ridge regularised regression, in order to identify genetic predictors of the size of an interaction between a drug and stimulus. The function will run the cv.glmnet function for the chosen number of repeats, applying cross-fold validation using chosen number of folds.

#Function for multi-variant regression
runGlm <- function(X, y, method = "lasso", repeats = 20, folds = 3) {
  #set up objects to store results
  modelList <- list()
  lambdaList <- c()
  varExplain <- c()
  coefMat <- matrix(NA, ncol(X), repeats)
  rownames(coefMat) <- colnames(X)

  #set alpha
  if (method == "lasso"){
    alpha = 1
  } else if (method == "ridge") {
    alpha = 0
  }
  
  #for the set number of repeats, run the regression
  for (i in seq(repeats)) {
    if (ncol(X) > 2) {
      #run cross validated generalised linear model with given parameters
      res <- cv.glmnet(X,y, type.measure = "mse", family="gaussian", 
                       nfolds = folds, alpha = alpha, standardize = FALSE)
      
      #store lamdas and min lambda value
      lambdaList <- c(lambdaList, res$lambda.min)
      
      #store result of cv.glmnet
      modelList[[i]] <- res
      
      #get coefficents with min lambda value 
      coefModel <- coef(res, s = "lambda.min")[-1] #remove intercept row
      
      #store coefficients for this repeat
      coefMat[,i] <- coefModel
      
      #calculate variance explained
      if(sum(coefModel !=0)){
      y.pred <- predict(res, s = "lambda.min", newx = X)
      #if there are no predictors, all y.pred will be the same so its not possible to calculate variance explained because the SD is 0
      varExp <- cor(as.vector(y),as.vector(y.pred))^2
      }else{ varExp <- NA}
      varExplain[i] <- ifelse(is.na(varExp), 0, varExp) 
      
     
      
    } else {
      fitlm<-lm(y~., data.frame(X))
      varExp <- summary(fitlm)$r.squared
      varExplain <- c(varExplain, varExp)
      
    }

  }
  #store all results 
  list(modelList = modelList, lambdaList = lambdaList, varExplain = varExplain, coefMat = coefMat)
}

lassoPlot To generate predictor profiles to display outputs from runGlm function. Function takes lassoOut, a list of objects that is generated by running runGlm, plus the feature and response matrices (geneMatrix and betaMatrix), plus specifications for freqCut, the proportion of bootstrapped repeats that a coefficient should be significant for it to be included in the predictor plot, and coefCut , the minimum value of a coefficient for it to be included in the plot.

lassoPlot <- function(lassoOut, geneMatrix, betaMatrix, freqCut = 1, coefCut = 0.01) {
  #object to hold all plots
  plotList <- list()
  
  #for each drug - stimuli combination, run the following:
  for (seaName in names(lassoOut)) {
    ###FOR THE BAR PLOT
    #extract mean coefficients for each drug - stimuli combination
    barValue <- rowMeans(lassoOut[[seaName]]$coefMat)
    #extract proportion of repeats for which each coefficient is significant
    freqValue <- rowMeans(abs(sign(lassoOut[[seaName]]$coefMat)))
    #filter out coefficients that don't meet freqCut and coefCut thresholds
    barValue <- barValue[abs(barValue) >= coefCut & freqValue >= freqCut] 
    #arrange the bar values in numerical order
    barValue <- barValue[order(barValue)]
    #if there are no sig coefficients, don't plot
    if(length(barValue) == 0) {
      plotList[[seaName]] <- NA
      next
    }
    
   
    ###FOR THE HEATMAP AND SCATTER PLOT
    #get feature matrix and response matrix to plot
    allData <- geneMatrix
    betaValue <- unlist(betaMatrix[seaName,])
    
    #get feature matrix for features with significant coefficients only
    tabValue <- allData[, names(barValue),drop=FALSE]
    ord <- order(betaValue)
    betaValue <- betaValue[ord]
    tabValue <- tabValue[ord, ,drop=FALSE]
    sampleIDs <- rownames(tabValue)
    tabValue <- as_tibble(tabValue)
    tabValue$Sample <- sampleIDs
    
    #annotate features as mutations, methylation cluster or IGHV, and apply different scaling     so that different colours can be used in plotting 
    
    #for mutations:
    matValue <- gather(tabValue, key = "Var",value = "Value", -Sample)
    matValue$Type <- "mut"
    
    #for methylation cluster
    matValue$Type[grep("Methylation",matValue$Var)] <- "meth"
    
    #for IGHV status
    matValue$Type[grep("IGHV.status",matValue$Var)] <- "ighv"
    
    #change the scale of the value so that IGHV, Methylation and Mutation do not overlap
    matValue[matValue$Type == "mut",]$Value = matValue[matValue$Type == "mut",]$Value + 10
    matValue[matValue$Type == "meth",]$Value = matValue[matValue$Type == "meth",]$Value + 20
    matValue[matValue$Type == "ighv",]$Value = matValue[matValue$Type == "ighv",]$Value + 30
    
    #change continous to catagorical
    matValue$Value <- factor(matValue$Value,levels = sort(unique(matValue$Value)))
    
    #arrange order of feature rows and columns in heatmap
    #heatmap rows should align with order of genetic coefficients
    matValue$Var <- factor(matValue$Var, levels = names(barValue))
    
    #sample columns should be ascending order according to value of coefficient of each patient, so that heatmap aligns with scatter plot below
    matValue$Sample <- factor(matValue$Sample, levels = names(betaValue))
    
    #update labels, keep factor levels
    matValue$Var <- revalue(matValue$Var, c("IGHV.status" = "IGHV status", "del11q" = "del(11q)", "del13q" = "del(13q)", "del17p" = "del(17p)", "trisomy12" = "trisomy 12")) 
    
    #MAKE PLOTS
    #plot the heatmap of genetic feature values
    p1 <- ggplot(matValue, aes(x=Sample, y=Var)) + 
      geom_tile(aes(fill=Value), color = "white") + #ghost white
      theme_bw()+
      scale_y_discrete(expand=c(0,0)) + 
      theme(axis.title.x = element_text( size=fontsize+4),
            axis.text.y=element_text(hjust=0, size=18, face="bold"), 
            axis.ticks=element_blank(),
            panel.border=element_rect(colour="gainsboro"),  
            plot.title=element_text(face="bold", size = 18, margin = margin(t = -5, b = 1)), 
            panel.background=element_blank(),
            panel.grid.major=element_blank(), 
            panel.grid.minor=element_blank()) + 
      xlab("Mutation status for each patient") + 
      ylab("") + 
      scale_fill_manual(name="Mutated", 
                              values=c(`10`= offwhite,  #WT
                                       `11`="#373A36", #Mutant
                                       `20`= offwhite, #LP
                                       `20.5`= "#707372", #IP
                                       `21` = "#A8A99E", #HP
                                       `30` = offwhite, #IGHV-U
                                       `31` = "#707372"), #IGHV-M
                                       guide="none") + 
            ggtitle(seaName)
    
    
    #Plot the bar plot on the left of the heatmap 
    barDF = data.frame(barValue, nm=factor(names(barValue),levels=names(barValue)))
    
    p2 <- ggplot(data=barDF, aes(x=nm, y=barValue)) + 
      geom_bar(stat="identity", 
               fill=ifelse(barValue<0,
                           palblues[6],palreds[8]), 
               colour="black", 
               size=0.3) +
      scale_x_discrete(expand=c(0,0.5))+ 
      scale_y_continuous(expand=c(0,0), n.breaks = 4)+ 
      coord_flip(ylim=c(-0.3,0.35))+ 
      theme(panel.grid.major=element_blank(), 
            panel.background=element_blank(), 
            axis.ticks.y = element_blank(),
            panel.grid.minor = element_blank(), 
            axis.text=element_text(size=fontsize, angle = 0, hjust = 0),
                axis.title = element_text(size=fontsize+4), 
            panel.border=element_blank()) +
      ylab("Size of coefficient") + 
      geom_vline(xintercept=c(0.5), 
                 color="black", 
                 size=0.6)
    
    #Plot the scatter plot of patient coefficient values under the heatmap
    scatterDF = data.frame(X=factor(names(betaValue), 
                                    levels=names(betaValue)), 
                           Y=unlist(betaValue))
    
    p3 <- ggplot(scatterDF, aes(x=X, y=Y)) + 
          geom_point(shape=21, 
                     fill="dimgrey", 
                     colour="#707372", #dark grey
                     size=1.2) + 
          theme_bw() +
          theme(panel.grid.minor=element_blank(), 
                panel.grid.major.x=element_blank(), 
                axis.ticks.x=element_blank(), 
                axis.text.y=element_text(size=fontsize),
                axis.title = element_text(size=fontsize+4),
                panel.border=element_rect(colour="dimgrey", size=0.1),
                panel.background=element_rect(fill="white")) +
  xlab(expression(paste("Patient-specific ", beta["int"])))
    
    
    #Assemble all the plots together

    # construct the gtable
    wdths = c(0.2, 1.5, 0.4, 1.3*ncol(matValue), 1.7, 0.2)
    hghts = c(0.3, 0.3, 0.0020*nrow(matValue), 0.2, 0.8, 0.3)*1.5
    gt = gtable(widths=unit(wdths, "in"), heights=unit(hghts, "in"))
    
    ## make grobs
    gg1 = ggplotGrob(p1)
    gg2 = ggplotGrob(p2)
    gg3 = ggplotGrob(p3)

    ## fill in the gtable
   
    #HEATMAP
    #5:1 = "PREDICTORS"
    gt = gtable_add_grob(gt, gtable_filter(gg1, "panel"), 3, 4) # add heatmap
    gt = gtable_add_grob(gt, gtable_filter(gg1, "panel"), 3, 4) #add legend
    gt = gtable_add_grob(gt, gtable_filter(gg1, "title"), 1, 4) #add title to plot
    gt = gtable_add_grob(gt, gtable_filter(gg1, "axis-l"), 3, 5) # variable names
    gt = gtable_add_grob(gt, gtable_filter(gg1, "xlab-b"), 2, 4) # axis title
    
    #BARPLOT
    gt = gtable_add_grob(gt, gtable_filter(gg2, "panel"), 3, 2) # add barplot
    gt = gtable_add_grob(gt, gtable_filter(gg2, "axis-b"), 4, 2) # y axis for barplot
    gt = gtable_add_grob(gt, gtable_filter(gg2, "xlab-b"), 2, 2) # y lab for barplot

    
    #SCATTER PLOT
    gt = gtable_add_grob(gt, gtable_filter(gg3, "panel"), 5, 4) # add scatterplot
    gt = gtable_add_grob(gt, gtable_filter(gg3, "xlab-b"), 6, 4) # x label for scatter plot
    gt = gtable_add_grob(gt, gtable_filter(gg3, "axis-l"), 5, 3) #  axis for scatter plot
    
   

    
    #plot
    plotList[[seaName]] <- gt
  }
  return(plotList)
}

makelegends: to make legends for predictor profile plot. AcceptslegendFor, a vector of names of what the legend should show (I (IGHV status), M (Methylation Cluster), G (Gene Mutation)), and colors, a vector of colours corresponding to the elements of legendFor.

makelegends <- function (legendFor, colors) 
{
    x = NULL
    y = NULL
    colors = colors[names(colors) %in% legendFor]
    nleg = length(colors)
    
    #edit these widths to change alignment with lasso plots in patchwork code
    wdths = c(0.4,2,2,2,1.5)
    hghts = c(2)
    
    gtl = gtable(widths=unit(wdths, "in"), heights=unit(hghts, "in"))
    n = 2
    if ("M" %in% names(colors)) {
        Mgg = ggplot(data = data.frame(x = 1, 
                                       y = factor(c("LP", "IP", "HP"), 
                                                  levels = c("LP", "IP", "HP"))), 
                     aes(x = x, y = y, fill = y)) + 
              geom_tile() + 
              scale_fill_manual(name = "Methylation cluster", 
                                values = setNames(colors[["M"]], 
                                                  nm = c("LP", "IP","HP"))) + 
              theme(legend.title = element_text(size = 12), 
                    legend.text = element_text(size = 12))
        
        gtl = gtable_add_grob(gtl, gtable_filter(ggplotGrob(Mgg), "guide-box"), 1, n)
        n = n + 1
    }
    
    if ("I" %in% names(colors)) {
        Igg = ggplot(data = data.frame(x = 1, y = factor(c("Unmutated", 
            "Mutated"), levels = c("Unmutated", "Mutated"))), 
            aes(x = x, y = y, fill = y)) + geom_tile() + scale_fill_manual(name = "IGHV", 
            values = setNames(colors[["I"]], nm = c("Unmutated", "Mutated"))) + 
            theme(legend.title = element_text(size = 12), 
                  legend.text = element_text(size = 12))
        gtl = gtable_add_grob(gtl, gtable_filter(ggplotGrob(Igg), 
            "guide-box"), 1, n)
        n = n + 1
    }
    
    if ("G" %in% names(colors)) {
        Ggg = ggplot(data = data.frame(x = 1, y = factor(c("Wild Type", 
            "Mutated"), levels = c("Wild Type", "Mutated"))), 
            aes(x = x, y = y, fill = y)) + geom_tile() + scale_fill_manual(name = "Gene", 
            values = setNames(colors[["G"]], nm = c("Wild Type", "Mutated"))) + 
            theme(legend.title = element_text(size = 12), 
                  legend.text = element_text(size = 12))
        gtl = gtable_add_grob(gtl, gtable_filter(ggplotGrob(Ggg), 
            "guide-box"), 1, n)
        n = n + 1
    }
    
    return(list(plot = gtl, width = sum(wdths), height = sum(hghts)))
}

39 Prework

39.1 Define Feature matrix

#select features from patient meta file
geneMatrix <- dplyr::select(patMeta, -c(gender:treatment, Methylation_Cluster)) %>%

  
  #adjust IGHV levels to 1 and 0 
  mutate(IGHV.status = ifelse(is.na(IGHV.status), NA, 
                              ifelse(IGHV.status == "M", 1, 0))) %>% 
  
    #change factors to characters and then to numeric  
    mutate_if(is.factor, as.character) %>% 
  
    mutate_at(vars(-PatientID), as.numeric) %>%
  
  #convert to matrix format
  data.frame() %>% 
  column_to_rownames("PatientID") %>% 
  as.matrix()


#Remove genes with higher than 20% missing values
geneMatrix <- geneMatrix[,colSums(is.na(geneMatrix))/nrow(geneMatrix) <= 0.2]

#Filter for patients with complete data
geneMatrix.complete <- geneMatrix[complete.cases(geneMatrix),]
nrow(geneMatrix.complete)
## [1] 137
#Combine KRAS, NRAS and BRAF mutations into a single column
##Add Ras_raf column
Ras_Raf <- matrix(NA, nrow = nrow(geneMatrix.complete), ncol = 1)
colnames(Ras_Raf) <- "KRAS,\nNRAS,\nBRAF"
geneMatrix.complete <- cbind(geneMatrix.complete, Ras_Raf)

#Add a 1 where any of KRAS, NRAS or BRAF are mutated
geneMatrix.complete[,"KRAS,\nNRAS,\nBRAF"] <- ifelse(geneMatrix.complete[,"KRAS"]==1,1,
                                                ifelse(geneMatrix.complete[,"BRAF"]==1,1,
                                                  ifelse(geneMatrix.complete[,"NRAS"]==1, 1, 0)))

#Remove individual KRAS, NRAS and BRAF columns 
geneMatrix.complete <- geneMatrix.complete[, !colnames(geneMatrix.complete) %in%  c("KRAS", "NRAS", "BRAF")]

39.2 Define Response matrix

Run linear model to get drug:stimulus:patient coefficients

#create a list of drug - cytokine combinations
combos <- expand.grid(thedrugs, thecytokines) %>% mutate(combination = paste(Var1, Var2, sep = ":")) %>% dplyr::select(combination)


#create object to store linear model fit
fit <- vector(mode = 'list', length = length(combos$combination))
names(fit) <- combos$combination

#create an object to store coefficients from fit
coefficients <- vector(mode = 'list', length = length(combos$combination))
names(coefficients) <- combos$combination

#define drugs and cytokines to run the model for
for(x in thedrugs){
  for(y in thecytokines){
    
    #get data for given drug and stimulis, and matching pateints to feature matrix
    modelTab <- dplyr::filter(df, 
                       PatientID %in% rownames(geneMatrix.complete), 
                       treatment_drug%in% c(x, "DMSO"), 
                       treatment_cytokine %in% c(y, "No Cytokine") )
    
    #define the base level per treatment_type as the "no"-treatment
    modelTab$treatment_drug <- as.factor(modelTab$treatment_drug)
    modelTab$treatment_cytokine <- as.factor(modelTab$treatment_cytokine)
    modelTab$PatientID <- as.factor(modelTab$PatientID)

    modelTab$treatment_cytokine %<>% relevel("No Cytokine")
    modelTab$treatment_drug %<>% relevel("DMSO")
    modelTab$PatientID %<>% relevel("Pat_001")
    
    # fit linear model, with interaction
    nam <- paste(x,y, sep = ":")
    fit.lm <- lm(Viability ~ treatment_drug * treatment_cytokine * PatientID, modelTab)
    fit[[nam]] <- fit.lm
    
    #extract coefficients and p values and put into a dataframe
    coeffdf <- summary(fit.lm)$coefficients[,1] %>% as.data.frame()

    #process coeffdf
    setDT(coeffdf, keep.rownames = TRUE)[]
    colnames(coeffdf) <- c("treatment", "beta")

    #filter out non-drug:cytokine:patient coefficients
    coeffdf <- dplyr::filter(coeffdf,
                      grepl('treatment_drug.*treatment_cytokine.*PatientID*', treatment))

      #remove treatment
      coeffdf <- coeffdf %>% 
        #remove treatment_drug string
        mutate_at(vars(treatment), list(~as.character(gsub("treatment_drug", "", .)))) %>%
        #remove treatment_cytokine string
        mutate_at(vars(treatment), list(~as.character(gsub("treatment_cytokine", "", .)))) %>%
        #remove PatientID string
        mutate_at(vars(treatment), list(~as.character(gsub("PatientID", "", .))))

      #split up into sperate drug, cytokine and patient columns, by colons
      coeffdf <- data.frame(coeffdf, do.call(rbind, strsplit(coeffdf$treatment, split = ":", fixed = TRUE)))

      #select columns of interest and rename
      coeffdf <- coeffdf[, c("beta", "X1", "X2", "X3")]
      colnames(coeffdf) <- c("beta","Drug", "Cytokine", "PatientID")
      
      #store in coefficients object 
      coefficients[[nam]] <- coeffdf
         
  }
}

Generate response matrix using lm coefficients

#bind together all coefficients for all drug:stimulus combinations
coefficients.all <- rbindlist(coefficients)

#make matrix
betaMatrix <- 
  #add column with drug and stimulus names
  dplyr:: mutate(coefficients.all, 
                 drugCytokine = paste0(Drug," + ",Cytokine)) %>%
  #remove single treatment columns 
  dplyr::select(-Cytokine, -Drug) %>%
  
  spread(key = PatientID, value = beta) %>% 
  data.frame() %>% 
  remove_rownames() %>%
  column_to_rownames("drugCytokine")

#make sure the sample order is the same as the geneMatrix
geneMatrix.complete <- geneMatrix.complete[ rownames(geneMatrix.complete) != "Pat_001",]
betaMatrix <- betaMatrix[,rownames(geneMatrix.complete)]

#check there are no NAs 
which(is.na(betaMatrix))
## integer(0)
which(betaMatrix == 0)
## integer(0)

39.3 Run lasso - regularised regression

#define object to hold outputs from runGlm()
dataResult <- list()

#for each drug + stimulus combination:
for (x in rownames(betaMatrix)){ 
  
      #prepare input and response matrices
      y <- unlist(betaMatrix[x,])
      X <- geneMatrix.complete
    
      #fit the model and find optimal value of lamba
      cvglmfit <- runGlm(X, y, method="lasso", repeats=30, folds=3)
      dataResult[[x]] <- cvglmfit

}

40 Figures

40.1 Figure 6A

Predictor Profile of the size of interaction between Fludarabine and CpG ODN
Generate all predictor profiles

#run lassoPlot function to generate predictor profiles, with no minimum coefficient value
heatMaps <- lassoPlot(dataResult, geneMatrix.complete, betaMatrix, freqCut = 0.9, coefCut = 0.0)

heatMaps <- heatMaps[!is.na(heatMaps)]

Plot predictor profile for Fludarabine + CpG ODN

Fig6A <- grid.arrange(grobs = heatMaps["Fludarabine + CpG ODN"], ncol = 1)

Fig6A
## TableGrob (1 x 1) "arrange": 1 grobs
##                       z     cells    name           grob
## Fludarabine + CpG ODN 1 (1-1,1-1) arrange gtable[layout]

40.2 Figure 6B

Heatmap of coefficients

Here we fit linear models to the viability matrix, for each drug - stimulus combination and extract interaction coefficients for each patient to generate a response matrix (n = 137). To generate the feature matrix, genetic mutations and CNVs (p= 39) and IGHV status (coded as 0-1) are used. Genetic features with >20% missing values were excluded, and only patients with complete annotation are included in the model. We run multivariate regression using a Gaussian linear model with L1-penalty using glmnet, 3-fold cross-validation and misclassification error as loss.

#Set up vector to hold coeff values
barValues <- vector(mode="list", length=length(dataResult))
names(barValues) <- names(dataResult)

#Set cut offs  
coefCut <- 0.0 #no minimum value of coefficient
freqCut <- 0.9 #coefficient must be selected in 90% of bootstrapped repeats

lassoOut <- dataResult

#for each drug - stimulus combination
for (seaName in names(lassoOut)) { 
    
    #get the result from DataResult for given drug:cytokine interaction, extract coefficient matrix and find row means 
    barValue <- rowMeans(lassoOut[[seaName]]$coefMat)
    
    #check number of occurrences of each coefficient in bootstrapped repeats
    freqValue <- rowMeans(abs(sign(lassoOut[[seaName]]$coefMat)))
    
    for(nam in names(barValue)){
      #add NA if coefficient value is below thresholds
      if(abs(barValue[nam]) < coefCut | freqValue[nam] < freqCut) {
          barValue[nam] <- NA }
    }  
    #add set of coefficients to list
    barValues[[seaName]] <- barValue
}

#bind coefficients together for all interactions
coeff.mat <- do.call(rbind,barValues)
   
#remove rows where all values are 0
coeff.mat <- coeff.mat[ rowSums(!is.na(coeff.mat)) > 0,]

#remove columns where all values are 0
coeff.mat <- coeff.mat[ ,colSums(!is.na(coeff.mat)) > 0]

#make any NAs = 0 
coeff.mat[is.na(coeff.mat)] <- 0

#cluster coeff.mat
fit <-
  coeff.mat %>% 
  dist() %>% 
  hclust()
  
order_comb <- rownames(coeff.mat)[fit$order]

#Put matrix into long format for ggplot
coeff.long.mat <- coeff.mat %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column("int") %>% 
  tidyr::gather(key = "gene", value = "coeff", -int)

#Check number of combinations affected by all genetic alterations
coeff.long.mat %>% 
  dplyr::filter(coeff!=0) %>%
  dplyr::group_by(int) %>% 
  dplyr::summarize()
## # A tibble: 60 x 1
##    int                     
##    <chr>                   
##  1 BAY-11-7085 + CpG ODN   
##  2 BAY-11-7085 + Resiquimod
##  3 BAY-11-7085 + TGF-b1    
##  4 Everolimus + CpG ODN    
##  5 Everolimus + IL-4       
##  6 Everolimus + Resiquimod 
##  7 Everolimus + sCD40L+IL-4
##  8 Everolimus + TGF-b1     
##  9 Fludarabine + CpG ODN   
## 10 Fludarabine + IL-21     
## # … with 50 more rows
coeff.long.mat %>% 
  dplyr::filter(coeff!=0) %>%
  dplyr::group_by(gene) %>% 
  dplyr::count() %>% 
  dplyr::arrange(desc(n))
## # A tibble: 14 x 2
## # Groups:   gene [14]
##    gene                     n
##    <chr>                <int>
##  1 "IGHV.status"           45
##  2 "trisomy12"             29
##  3 "del13q"                27
##  4 "TP53"                  13
##  5 "del11q"                12
##  6 "SF3B1"                 11
##  7 "ATM"                    6
##  8 "del17p"                 5
##  9 "gain8q"                 5
## 10 "NOTCH1"                 4
## 11 "KRAS,\nNRAS,\nBRAF"     2
## 12 "del6q"                  1
## 13 "EGR2"                   1
## 14 "NFKBIE"                 1
#get top 8 coefficients 
order_alt <- 
  coeff.long.mat %>% 
  dplyr::filter(coeff!=0) %>% 
  dplyr::group_by(gene) %>% 
  dplyr::count(sort=T) %>% 
  dplyr::select(gene) %>% 
  unlist() %>%
  .[1:8] 

#Number of combinations affected by Top 8 genetic alterations
coeff.long.mat %>% 
  dplyr::filter(coeff!=0, gene%in%order_alt) %>%
  dplyr::group_by(int) %>% 
  dplyr::summarize()
## # A tibble: 60 x 1
##    int                     
##    <chr>                   
##  1 BAY-11-7085 + CpG ODN   
##  2 BAY-11-7085 + Resiquimod
##  3 BAY-11-7085 + TGF-b1    
##  4 Everolimus + CpG ODN    
##  5 Everolimus + IL-4       
##  6 Everolimus + Resiquimod 
##  7 Everolimus + sCD40L+IL-4
##  8 Everolimus + TGF-b1     
##  9 Fludarabine + CpG ODN   
## 10 Fludarabine + IL-21     
## # … with 50 more rows
#Plot heatmap
coeff.long.mat.ordered <-  
  coeff.long.mat %>%
  dplyr::filter(gene %in% order_alt, coeff!=0) %>% 
  dplyr::mutate(gene = factor(gene, levels = order_alt), int = factor(int, levels = rev(order_comb)))  %>% 
  mutate(direction = ifelse(coeff>0, "Positive", "Negative")) %>% 
  separate(int, sep=" \\+ ", into=c("Drug", "Stimulus"), remove = FALSE)

Order_Stim <- 
  coeff.long.mat.ordered %>% 
  dplyr::filter(coeff!=0) %>% 
  dplyr::group_by(Stimulus) %>% 
  dplyr::count(sort=T) %>% 
  dplyr::select(Stimulus) %>% 
  unlist()

Order_Drug <- 
  coeff.long.mat.ordered %>% 
  dplyr::filter(coeff!=0) %>% 
  dplyr::group_by(Drug) %>% 
  dplyr::count(sort=T) %>% 
  dplyr::select(Drug) %>% 
  unlist() %>% 
  rev()

coeff.long.mat.ordered <-
  coeff.long.mat.ordered %>%
  mutate(Stimulus=factor(Stimulus, levels = Order_Stim), 
         Drug=factor(Drug, levels = Order_Drug))
  

cyt_labels = c("TGF-b1"="TGF-\u03B21", "sCD40L+IL-4"="sCD40L + IL4", "IL-1b"="IL1\u03B2", "IL-4"="IL4", "IL-6"="IL6","IL-15"="IL15","IL-10"="IL10", "IL-21"="IL21","IL-2"="IL2", "Interferon gamma"= "Interferon \u03B3", "SDF-1a"="SDF-1\u03B1", "CpG ODN"="CpG ODN", "Resiquimod"="Resiquimod", "sCD40L"="sCD40L", "HS-5 CM"="HS-5 CM", "soluble anti-IgM"="soluble anti-IgM", "BAFF"="BAFF")

gene_labels = c("IGHV.status" = "IGHV status", "del11q" = "del(11q)", "del13q" = "del(13q)", "del17p" = "del(17p)", "trisomy12" = "trisomy 12", "TP53" = "TP53", "SF3B1" = "SF3B1", "ATM" = "ATM")


Fig6B <- 
  coeff.long.mat.ordered %>% 
  ggplot(aes(y=Drug, x=gene)) +
  geom_tile(aes(fill=coeff),color = "white") +
  scale_fill_gradientn(colors=c(rep(palblues[1:4],each=2),
                                "white", 
                                rep(palreds[5:8], each=2)),  
                       limits=c(-0.8,.8)) +
  t1 +
  theme(axis.text.x = element_text(angle = 35, hjust = 1, vjust = 1),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.ticks = element_blank(),
        axis.ticks.x =element_blank(),
        panel.background = element_rect(color = "black", fill=NA), 
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        legend.key.height=unit(0.5, "cm"),
        legend.key.width=unit(2, "cm"),
        legend.title = element_text(face='bold', hjust = 0, size=fontsize+2),
        legend.position = "top",
        legend.key = element_blank(),
        legend.text = element_text(size=fontsize),
        legend.background = element_rect(color = NA),
        strip.text.y.left = element_text(size=22, angle = 0, face="bold"),
        strip.background = element_blank())+
  
  
  labs(fill = expression(beta["int"]))+
  scale_y_discrete(position = "right")+
  scale_x_discrete(labels = gene_labels) +
  facet_grid(Stimulus~., scales = "free_y",  space="free_y", switch = "both", labeller =labeller( Stimulus=cyt_labels))
  
Fig6B

40.3 Figure 6C

Fludarabine + CpG ODN viability values, faceted by IGHV status and trisomy 12 status

#set facet labels 
tri12.labs <- c("0" = "Non-\ntrisomy 12", "1" = "trisomy 12")
ighv.labs <- c("U" = "IGHV-U", "M" =  "IGHV-M")

#join viability and genetic data tables
df.patMeta <- left_join(df, patMeta, by = "PatientID")

Fig6C <-

  df.patMeta %>% 
  #filter for drug:stimulus combinations of interest, make sure no NAs in genetic data
  dplyr::filter(DCK%in%c("DMSO:CpG ODN","Fludarabine:CpG ODN","Fludarabine:No Cytokine"),
                !is.na(trisomy12),
                !is.na(IGHV.status)) %>%
    mutate(DCK=factor(DCK, levels=c("DMSO:CpG ODN","Fludarabine:No Cytokine","Fludarabine:CpG ODN"))) %>% 
  
  #plot treatment combination against viability 
  ggplot(aes(x = DCK,y = Viability,color= DCK))+
  geom_hline(yintercept = 0)+
  geom_boxplot()+
  geom_beeswarm(cex=1.5) +
  guides(color="none", shape="none")+
  #add p values
  stat_compare_means(method = "t.test",
                     label.y.npc = 0.8, 
                     paired = TRUE, 
                     comparisons = list(c(1,3), c(2,3)),
                     step.increase=0.2, 
                     size=6) +
  xlab("") +
  ylab("Log(Viability)") +
  #facet by trisomy 12 and IGHV status
  facet_grid(vars(trisomy12), 
             vars(IGHV.status),
             labeller = labeller(trisomy12 = tri12.labs, IGHV.status = ighv.labs))+
  
  scale_x_discrete(labels=c("DMSO:CpG ODN"="CpG ODN",
                            "Fludarabine:No Cytokine"="Fludarabine",
                            "Fludarabine:CpG ODN"="Fludarabine \n+ CpG ODN "))+  
    
  scale_color_manual(values=c(colors[4], colors[5],colors[3])) + 
  scale_y_continuous(expand = expansion(mult = c(0, 0.1)))+
  t2 +
  theme(strip.background =element_rect(fill=NA),
        strip.text = element_text(size=fontsize+4, face="bold"),
        strip.text.y = element_text(angle = 0),
        axis.text.x = element_text(size=fontsize+4, angle = 35, hjust = 1, vjust = 1))
 
Fig6C 

40.4 Figure 6D

Fig6D <-

  df.patMeta %>% 
  #filter for drug:stimulus combinations of interest, make sure no NAs in genetic data
  dplyr::filter(DCK%in%c("DMSO:IL-4","Ibrutinib:IL-4","Ibrutinib:No Cytokine"),
                !is.na(trisomy12),
                !is.na(IGHV.status)) %>%
     mutate(DCK = factor(DCK, levels=c("DMSO:IL-4",
                                      "Ibrutinib:No Cytokine",
                                      "Ibrutinib:IL-4"))) %>%
  
  #plot treatment combination against viability 
  ggplot(aes(x = DCK,y = Viability,color= DCK))+
  geom_hline(yintercept = 0)+
  geom_boxplot()+
  geom_beeswarm(cex=1.5) +
  guides(color="none", shape="none")+
  #add p values
  stat_compare_means(method = "t.test",
                     label.y.npc = 0.8, 
                     paired = TRUE, 
                     comparisons = list(c(1,3), c(2,3)),
                     step.increase=0.2, 
                     size=6) +
  xlab("") +
  ylab("Log(Viability)") +
  #facet by trisomy 12 and IGHV status
  facet_grid(vars(trisomy12), 
             vars(IGHV.status),
             labeller = labeller(trisomy12 = tri12.labs, IGHV.status = ighv.labs))+
  
  scale_x_discrete(labels=c("DMSO:IL-4"="IL4",
                            "Ibrutinib:No Cytokine"="Ibrutinib",
                            "Ibrutinib:IL-4"="Ibrutinib \n+ IL4"))+  
    
  scale_color_manual(values=c(colors[4], colors[5], colors[3])) + 
  scale_y_continuous(expand = expansion(mult = c(0, 0.1)))+
  t2 +
  theme(strip.background =element_rect(fill=NA),
        strip.text = element_text(size=fontsize+4, face="bold"),
        strip.text.y = element_text(angle = 0),
        axis.text.x = element_text(size=fontsize+4, angle = 35, hjust = 1, vjust = 1))
 
Fig6D 

41 Save data

Save for use in supplementary script

Lasso_Plots_Fig6 <- heatMaps
coeff.long.mat_Fig6 <- coeff.long.mat

save(Lasso_Plots_Fig6, 
     coeff.long.mat_Fig6, 
     file = "../../data/fig6_data_for_supplement.RData")

42 Assemble Figure

#Design
design1<-"
  AB
  CB
  DB
"

#Tag Theme
tp <- theme(plot.tag=element_text(size = 30))

# Plot

(wrap_elements(Fig6A)) + tp +

Fig6B + tp +
  
Fig6C+ tp +
  
Fig6D + tp +  
  
plot_annotation(tag_levels = "A") + 

plot_layout(design=design1, heights = c(0.9,1,1))

43 Appendix

Sys.info()
##                                                                                             sysname 
##                                                                                            "Darwin" 
##                                                                                             release 
##                                                                                            "19.6.0" 
##                                                                                             version 
## "Darwin Kernel Version 19.6.0: Thu May  6 00:48:39 PDT 2021; root:xnu-6153.141.33~1/RELEASE_X86_64" 
##                                                                                            nodename 
##                                               "mac-huber20.academic.christs.cam.ac.uk.pool.embl.de" 
##                                                                                             machine 
##                                                                                            "x86_64" 
##                                                                                               login 
##                                                                                             "holly" 
##                                                                                                user 
##                                                                                             "holly" 
##                                                                                      effective_user 
##                                                                                             "holly"
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.1                          
##  [2] stringr_1.4.0                          
##  [3] dplyr_1.0.7                            
##  [4] purrr_0.3.4                            
##  [5] readr_1.4.0                            
##  [6] tibble_3.1.2                           
##  [7] tidyverse_1.3.1                        
##  [8] tidyr_1.1.3                            
##  [9] rlang_0.4.11                           
## [10] maxstat_0.7-25                         
## [11] data.table_1.14.0                      
## [12] png_0.1-7                              
## [13] formattable_0.2.1                      
## [14] broom_0.7.8                            
## [15] scales_1.1.1                           
## [16] gtable_0.3.0                           
## [17] genomation_1.24.0                      
## [18] ChIPseeker_1.28.3                      
## [19] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [20] GenomicFeatures_1.44.0                 
## [21] msigdbr_7.4.1                          
## [22] org.Hs.eg.db_3.13.0                    
## [23] AnnotationDbi_1.54.1                   
## [24] clusterProfiler_4.0.0                  
## [25] ConsensusClusterPlus_1.56.0            
## [26] glmnet_4.1-2                           
## [27] Matrix_1.3-4                           
## [28] survminer_0.4.9                        
## [29] ggpubr_0.4.0                           
## [30] DESeq2_1.32.0                          
## [31] SummarizedExperiment_1.22.0            
## [32] Biobase_2.52.0                         
## [33] MatrixGenerics_1.4.0                   
## [34] matrixStats_0.59.0                     
## [35] GenomicRanges_1.44.0                   
## [36] GenomeInfoDb_1.28.1                    
## [37] IRanges_2.26.0                         
## [38] S4Vectors_0.30.0                       
## [39] BiocGenerics_0.38.0                    
## [40] ggfortify_0.4.11                       
## [41] pheatmap_1.0.12                        
## [42] magrittr_2.0.1                         
## [43] ggbeeswarm_0.6.0                       
## [44] ggrepel_0.9.1                          
## [45] RColorBrewer_1.1-2                     
## [46] cowplot_1.1.1                          
## [47] patchwork_1.1.1                        
## [48] magick_2.7.2                           
## [49] gridExtra_2.3                          
## [50] Hmisc_4.5-0                            
## [51] ggplot2_3.3.5                          
## [52] Formula_1.2-4                          
## [53] survival_3.2-11                        
## [54] lattice_0.20-44                        
## [55] plyr_1.8.6                             
## [56] BiocStyle_2.20.2                       
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.3           rtracklayer_1.52.0       exactRankTests_0.8-32   
##   [4] bit64_4.0.5              knitr_1.33               DelayedArray_0.18.0     
##   [7] rpart_4.1-15             KEGGREST_1.32.0          RCurl_1.98-1.3          
##  [10] generics_0.1.0           RSQLite_2.2.7            shadowtext_0.0.8        
##  [13] bit_4.0.4                enrichplot_1.12.2        lubridate_1.7.10        
##  [16] xml2_1.3.2               assertthat_0.2.1         viridis_0.6.1           
##  [19] xfun_0.24                hms_1.1.0                babelgene_21.4          
##  [22] evaluate_0.14            fansi_0.5.0              restfulr_0.0.13         
##  [25] progress_1.2.2           caTools_1.18.2           dbplyr_2.1.1            
##  [28] readxl_1.3.1             km.ci_0.5-2              igraph_1.2.6            
##  [31] DBI_1.1.1                geneplotter_1.70.0       htmlwidgets_1.5.3       
##  [34] ellipsis_0.3.2           crosstalk_1.1.1          backports_1.2.1         
##  [37] bookdown_0.22.3          markdown_1.1             annotate_1.70.0         
##  [40] gridBase_0.4-7           biomaRt_2.48.2           vctrs_0.3.8             
##  [43] abind_1.4-5              cachem_1.0.5             withr_2.4.2             
##  [46] ggforce_0.3.3            BSgenome_1.60.0          checkmate_2.0.0         
##  [49] GenomicAlignments_1.28.0 treeio_1.16.1            prettyunits_1.1.1       
##  [52] cluster_2.1.2            DOSE_3.18.1              mltools_0.3.5           
##  [55] ape_5.5                  lazyeval_0.2.2           crayon_1.4.1            
##  [58] genefilter_1.74.0        labeling_0.4.2           pkgconfig_2.0.3         
##  [61] tweenr_1.0.2             nlme_3.1-152             vipor_0.4.5             
##  [64] nnet_7.3-16              lifecycle_1.0.0          downloader_0.4          
##  [67] filelock_1.0.2           BiocFileCache_2.0.0      modelr_0.1.8            
##  [70] seqPattern_1.24.0        cellranger_1.1.0         polyclip_1.10-0         
##  [73] aplot_0.0.6              KMsurv_0.1-5             carData_3.0-4           
##  [76] boot_1.3-28              zoo_1.8-9                reprex_2.0.0            
##  [79] base64enc_0.1-3          beeswarm_0.4.0           viridisLite_0.4.0       
##  [82] rjson_0.2.20             bitops_1.0-7             KernSmooth_2.23-20      
##  [85] Biostrings_2.60.1        blob_1.2.1               shape_1.4.6             
##  [88] pdftools_3.0.1           qvalue_2.24.0            qpdf_1.1                
##  [91] jpeg_0.1-8.1             rstatix_0.7.0            ggsignif_0.6.2          
##  [94] memoise_2.0.0            gplots_3.1.1             zlibbioc_1.38.0         
##  [97] compiler_4.1.0           scatterpie_0.1.6         BiocIO_1.2.0            
## [100] plotrix_3.8-1            cli_3.0.0                Rsamtools_2.8.0         
## [103] XVector_0.32.0           htmlTable_2.2.1          MASS_7.3-54             
## [106] tidyselect_1.1.1         stringi_1.6.2            highr_0.9               
## [109] yaml_2.2.1               GOSemSim_2.18.0          askpass_1.1             
## [112] locfit_1.5-9.4           latticeExtra_0.6-29      survMisc_0.5.5          
## [115] fastmatch_1.1-0          tools_4.1.0              rio_0.5.27              
## [118] rstudioapi_0.13          foreach_1.5.1            foreign_0.8-81          
## [121] farver_2.1.0             ggraph_2.0.5             digest_0.6.27           
## [124] rvcheck_0.1.8            BiocManager_1.30.16      ggtext_0.1.1            
## [127] gridtext_0.1.4           Rcpp_1.0.6               car_3.0-11              
## [130] httr_1.4.2               colorspace_2.0-2         rvest_1.0.0             
## [133] fs_1.5.0                 XML_3.99-0.6             splines_4.1.0           
## [136] tidytree_0.3.4           graphlayouts_0.7.1       xtable_1.8-4            
## [139] jsonlite_1.7.2           ggtree_3.0.2             tidygraph_1.2.0         
## [142] lemon_0.4.5              R6_2.5.0                 pillar_1.6.1            
## [145] htmltools_0.5.1.1        DT_0.18                  glue_1.4.2              
## [148] fastmap_1.1.0            BiocParallel_1.26.1      codetools_0.2-18        
## [151] fgsea_1.18.0             mvtnorm_1.1-2            utf8_1.2.1              
## [154] curl_4.3.2               gtools_3.9.2             zip_2.2.0               
## [157] GO.db_3.13.0             openxlsx_4.2.4           rmarkdown_2.9           
## [160] munsell_0.5.0            DO.db_2.9                GenomeInfoDbData_1.2.6  
## [163] iterators_1.0.13         impute_1.66.0            haven_2.4.1             
## [166] reshape2_1.4.4


In this sub-vignette we present the analysis and source code for figure 7. This sub-vignette can be built along with all other sub-vignettes by running CLLCytokineScreen2021.Rmd.

44 Set up

Load libraries

library(patchwork)
library(maxstat)
library(survminer)
library(survival)
library(ggpubr)
library(rlang)
library(ggbeeswarm)
library(dplyr)

Set plot directory

plotDir = ifelse(exists(".standalone"), "", "../../inst/figs/")
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)

45 Load data

Raw

#ihc_patient_data: 596 x 7 tibble containing IHC staining intensities forpIRAK4, pSTAT6 and STAT6 staining in CLL and healthy tissue 
load( "../../data/ihc_patient_data.RData")

#ihc_surv_data: 200 x 15 tibble containing IHC staining intensities for pIRAK4, pSTAT6 and STAT6 staining in CLL and healthy tissue, along with meta data (including clinical outcomes) on each patient sample
load( "../../data/ihc_surv_data.RData")
#ihc_patient_data: 596 x 7 tibble containing IHC staining intensities forpIRAK4, pSTAT6 and STAT6 staining in CLL and healthy tissue 
ihc_patient_data <- read.table(file= "../../inst/extdata/ihc_patient_data.txt",header = TRUE) %>% as_tibble() %>% dplyr::group_by(PatientID)

#ihc_surv_data: 200 x 15 tibble containing IHC staining intensities for pIRAK4, pSTAT6 and STAT6 staining in CLL and healthy tissue, along with meta data (including clinical outcomes) on each patient sample
ihc_surv_data <- read.table(file= "../../inst/extdata/ihc_surv_data.txt",header = TRUE) %>% as_tibble() %>% dplyr::group_by(PatientID)

Process

#convert stain type to factor
ihc_patient_data$Stain <- factor(ihc_patient_data$Stain, levels = c("pSTAT6", "STAT6" , "pIRAK4"))

46 Set aesthetics

source("../../R/themes_colors.R")

47 Plot Figures

47.1 Figure 7A

Beeswarm-boxplot of mean staining intensities of pSTAT6, for healthy and CLL LN samples

Fig7A <-
  ihc_patient_data %>%
  #get staining intensities for pSTAT6 only
  dplyr::filter(Stain %in% c("pSTAT6")) %>%
  #plot staining intensity, stratified by CLL / Healthy LN
  ggplot(aes(x = Tissue, y = Intensity)) +
  geom_boxplot() +
  geom_beeswarm(aes(color=Tissue), alpha=1, cex=2) +
  #add p values
  stat_compare_means(method = "t.test", comparisons=list(c(1,2)), size=6) +
  scale_color_manual(values = c(palreds[8], palblues[2])) +
  xlab("") +
  ylab("Mean pSTAT6 Staining Intensity") +
  guides(color = "none") +
  scale_x_discrete(labels = c( "CLL"="CLL-infiltrated \nlymph nodes", "LK"="Non-neoplastic \nlymph nodes")) +
  coord_cartesian(clip = "off") +
  #add preset theme 2
  t2

Fig7A

47.2 Figure 7B

Beeswarm-boxplot of mean staining intensities of pIRAK4, for healthy and CLL LN samples

Fig7B <-
  ihc_patient_data %>%
  #filter for pIRAK4 staining intensities only
  dplyr::filter(Stain%in%c("pIRAK4")) %>%
  #plot staining intensity, stratified by CLL / Healthy LN
  ggplot(aes(x=Tissue, y=Intensity)) +
  geom_boxplot() +
  geom_beeswarm(aes(color=Tissue), alpha=1, cex=2) +
  #add p values 
  stat_compare_means(method = "t.test", comparisons=list(c(1,2)), size=6) +
  scale_color_manual(values = c(palreds[8], palblues[2])) +
  xlab("") +
  ylab("Mean pIRAK4 Staining Intensity") +
  guides(color="none") +
  scale_x_discrete(labels = c( "CLL"="CLL-infiltrated \nlymph nodes", "LK"="Non-neoplastic \nlymph nodes")) +
  coord_cartesian(clip = "off") +
  #add preset theme 2
  t2



Fig7B

47.3 Figure 7C

Image of IHC cross-section of CLL-infiltrated LN stained for pSTAT6

Fig7C <-cowplot::ggdraw() +
  cowplot::draw_image( "../../inst/images/CLL1_I3_pSTAT6.png", scale = 1)

Fig7C

47.4 Figure 7D

Image of IHC cross-section of healthy LN stained for pSTAT6

##read image
Fig7D <- cowplot::ggdraw() + 
  cowplot::draw_image("../../inst/images/LK2_B4_pSTAT6.png", scale = 1)

Fig7D

47.5 Figure 7E

Image of IHC cross-section of CLL-infiltrated LN stained for pIRAK4

Fig7E <- cowplot::ggdraw() + 
  cowplot::draw_image( "../../inst/images/CLL1_I3_pIRAK4.png", scale = 1)

Fig7E

47.6 Figure 7F

Image of IHC cross-section of healthy LN stained for pIRAK4

Fig7F <- cowplot::ggdraw() + 
  cowplot::draw_image( "../../inst/images/LK2_B4_pIRAK4.png", scale = 1)

Fig7F

47.7 Figure 7G

Kaplan-Meier curve to show associations of pSTAT6 levels with treatment free survival.
In Figures 7G and 7H the two pSTAT6 and pIRAK4 groups (high /low) were defined by mean staining intensities dichotomised using maximally selected rank statistics. The same 64 CLL lymph node samples were used for both Kaplan-Meier plots. 50 patient samples were in the high pSTAT6 group, and 52 in the high pIRAK4 group.

Prework for 7G and H

#Define stains for which want to visualize TTT
stains <- c("pSTAT6", "pIRAK4")


#Get optimal cut offs 
stats <- lapply(stains, function(stn){
 
  survival <- dplyr::select(ihc_surv_data, PatientID, Tissue, Diagnosis, Sex, TTT, treatedAfter, stn)
  colnames(survival) <- c("PatientID", "Tissue", "Diagnosis", "Sex", "TTT", "treatedAfter", "target")
  
  #Run test to obtain cut off threshold for high pSTAT6 versus low pSTAT6

  maxtest <- maxstat.test(Surv(TTT, treatedAfter)~ target, 
                          data = survival,
                          smethod = "LogRank",
                          alpha = NULL)
  
  cutpoint <- maxtest$estimate
  
 })
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(stn)` instead of `stn` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
  names(stats) <- c("pSTAT6", "pIRAK4")
  
                                          
  #Annotate by cutoff point
  survdf <- mutate(ihc_surv_data, 
                   phosphoSTAT6 = ifelse(pSTAT6 < stats$pSTAT6, "low", "high"), 
                   phosphoIRAK4 = ifelse(pIRAK4 < stats$pIRAK4, "low", "high"))
  
                                      

  #fit survival models
  f_pstat6 <- survfit(Surv(TTT, treatedAfter) ~ phosphoSTAT6, data= survdf)
  f_pirak4 <- survfit(Surv(TTT, treatedAfter) ~ phosphoIRAK4, data= survdf)
  
  fits <- list(pSTAT6 = f_pstat6, pIRAK4 = f_pirak4)
  
  #Make plot
  gg = ggsurvplot_list(fits, 
                       survdf,  
                       pval=TRUE, 
                       palette=c(palreds[8],palreds[3]),  
                       risk.table = TRUE, legend.title = "", 
                       ggtheme = t2, 
                       legend.labs =list(c("High", "Low"),c("High", "Low")),  
                       xlab="Time in years", ylab="\nTreatment free survival", title= "", legend = "bottom")
#get plot for pSTAT6 stain
 Fig7G <-
  wrap_elements(gg$pSTAT6$plot + 
                  theme(axis.title.x = element_blank()) +
                  gg$pSTAT6$table + 
                  theme(plot.title = element_blank()) +
                  plot_layout(ncol = 1, heights = c(85, 15)))
  
Fig7G

47.8 Figure 7H

Kaplan-Meier curve to show associations of pSTAT6 levels with treatment free survival.

#get plot for pIRAK4 stain 
Fig7H <- wrap_elements(gg$pIRAK4$plot+ 
                         theme(axis.title.x = element_blank()) +
                         gg$pIRAK4$table + 
                         theme(plot.title = element_blank()) +
                  plot_layout(ncol=1, heights = c(85, 15))) 


Fig7H

48 Assemble Figure

design1 <-"
  ACG
  ADG
  BEH
  BFH
  "
tp <- theme(plot.tag = element_text(size = 30, vjust = 1))

Fig7 <-
  wrap_elements(Fig7A) + tp +
  wrap_elements(Fig7B) + tp +
  Fig7C + tp +
  Fig7D + tp +
  Fig7E + tp +
  Fig7F + tp +
  
  Fig7G + tp +
  
  Fig7H + tp +
  
  plot_layout(design = design1, widths = c(1,0.7,1), heights =c(0.8, 1, 0.8, 1))+
  plot_annotation(tag_levels = "A")
  
Fig7

49 Count tables

ihc_surv_data %>% 
  ungroup() %>% 
  mutate(TFT = ifelse(is.na(TFT)|TFT == "NA", "NA", "Available" )) %>% 
  dplyr::group_by(Diagnosis, TFT) %>% 
  count()
## # A tibble: 3 x 3
## # Groups:   Diagnosis, TFT [3]
##   Diagnosis TFT           n
##   <chr>     <chr>     <int>
## 1 CLL       Available    64
## 2 CLL       NA           36
## 3 healthy   NA          100

50 Appendix

Sys.info()
##                                                                                             sysname 
##                                                                                            "Darwin" 
##                                                                                             release 
##                                                                                            "19.6.0" 
##                                                                                             version 
## "Darwin Kernel Version 19.6.0: Thu May  6 00:48:39 PDT 2021; root:xnu-6153.141.33~1/RELEASE_X86_64" 
##                                                                                            nodename 
##                                               "mac-huber20.academic.christs.cam.ac.uk.pool.embl.de" 
##                                                                                             machine 
##                                                                                            "x86_64" 
##                                                                                               login 
##                                                                                             "holly" 
##                                                                                                user 
##                                                                                             "holly" 
##                                                                                      effective_user 
##                                                                                             "holly"
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.1                          
##  [2] stringr_1.4.0                          
##  [3] dplyr_1.0.7                            
##  [4] purrr_0.3.4                            
##  [5] readr_1.4.0                            
##  [6] tibble_3.1.2                           
##  [7] tidyverse_1.3.1                        
##  [8] tidyr_1.1.3                            
##  [9] rlang_0.4.11                           
## [10] maxstat_0.7-25                         
## [11] data.table_1.14.0                      
## [12] png_0.1-7                              
## [13] formattable_0.2.1                      
## [14] broom_0.7.8                            
## [15] scales_1.1.1                           
## [16] gtable_0.3.0                           
## [17] genomation_1.24.0                      
## [18] ChIPseeker_1.28.3                      
## [19] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [20] GenomicFeatures_1.44.0                 
## [21] msigdbr_7.4.1                          
## [22] org.Hs.eg.db_3.13.0                    
## [23] AnnotationDbi_1.54.1                   
## [24] clusterProfiler_4.0.0                  
## [25] ConsensusClusterPlus_1.56.0            
## [26] glmnet_4.1-2                           
## [27] Matrix_1.3-4                           
## [28] survminer_0.4.9                        
## [29] ggpubr_0.4.0                           
## [30] DESeq2_1.32.0                          
## [31] SummarizedExperiment_1.22.0            
## [32] Biobase_2.52.0                         
## [33] MatrixGenerics_1.4.0                   
## [34] matrixStats_0.59.0                     
## [35] GenomicRanges_1.44.0                   
## [36] GenomeInfoDb_1.28.1                    
## [37] IRanges_2.26.0                         
## [38] S4Vectors_0.30.0                       
## [39] BiocGenerics_0.38.0                    
## [40] ggfortify_0.4.11                       
## [41] pheatmap_1.0.12                        
## [42] magrittr_2.0.1                         
## [43] ggbeeswarm_0.6.0                       
## [44] ggrepel_0.9.1                          
## [45] RColorBrewer_1.1-2                     
## [46] cowplot_1.1.1                          
## [47] patchwork_1.1.1                        
## [48] magick_2.7.2                           
## [49] gridExtra_2.3                          
## [50] Hmisc_4.5-0                            
## [51] ggplot2_3.3.5                          
## [52] Formula_1.2-4                          
## [53] survival_3.2-11                        
## [54] lattice_0.20-44                        
## [55] plyr_1.8.6                             
## [56] BiocStyle_2.20.2                       
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.3           rtracklayer_1.52.0       exactRankTests_0.8-32   
##   [4] bit64_4.0.5              knitr_1.33               DelayedArray_0.18.0     
##   [7] rpart_4.1-15             KEGGREST_1.32.0          RCurl_1.98-1.3          
##  [10] generics_0.1.0           RSQLite_2.2.7            shadowtext_0.0.8        
##  [13] bit_4.0.4                enrichplot_1.12.2        lubridate_1.7.10        
##  [16] xml2_1.3.2               assertthat_0.2.1         viridis_0.6.1           
##  [19] xfun_0.24                hms_1.1.0                babelgene_21.4          
##  [22] evaluate_0.14            fansi_0.5.0              restfulr_0.0.13         
##  [25] progress_1.2.2           caTools_1.18.2           dbplyr_2.1.1            
##  [28] readxl_1.3.1             km.ci_0.5-2              igraph_1.2.6            
##  [31] DBI_1.1.1                geneplotter_1.70.0       htmlwidgets_1.5.3       
##  [34] ellipsis_0.3.2           crosstalk_1.1.1          backports_1.2.1         
##  [37] bookdown_0.22.3          markdown_1.1             annotate_1.70.0         
##  [40] gridBase_0.4-7           biomaRt_2.48.2           vctrs_0.3.8             
##  [43] abind_1.4-5              cachem_1.0.5             withr_2.4.2             
##  [46] ggforce_0.3.3            BSgenome_1.60.0          checkmate_2.0.0         
##  [49] GenomicAlignments_1.28.0 treeio_1.16.1            prettyunits_1.1.1       
##  [52] cluster_2.1.2            DOSE_3.18.1              mltools_0.3.5           
##  [55] ape_5.5                  lazyeval_0.2.2           crayon_1.4.1            
##  [58] genefilter_1.74.0        labeling_0.4.2           pkgconfig_2.0.3         
##  [61] tweenr_1.0.2             nlme_3.1-152             vipor_0.4.5             
##  [64] nnet_7.3-16              lifecycle_1.0.0          downloader_0.4          
##  [67] filelock_1.0.2           BiocFileCache_2.0.0      modelr_0.1.8            
##  [70] seqPattern_1.24.0        cellranger_1.1.0         polyclip_1.10-0         
##  [73] aplot_0.0.6              KMsurv_0.1-5             carData_3.0-4           
##  [76] boot_1.3-28              zoo_1.8-9                reprex_2.0.0            
##  [79] base64enc_0.1-3          beeswarm_0.4.0           viridisLite_0.4.0       
##  [82] rjson_0.2.20             bitops_1.0-7             KernSmooth_2.23-20      
##  [85] Biostrings_2.60.1        blob_1.2.1               shape_1.4.6             
##  [88] pdftools_3.0.1           qvalue_2.24.0            qpdf_1.1                
##  [91] jpeg_0.1-8.1             rstatix_0.7.0            ggsignif_0.6.2          
##  [94] memoise_2.0.0            gplots_3.1.1             zlibbioc_1.38.0         
##  [97] compiler_4.1.0           scatterpie_0.1.6         BiocIO_1.2.0            
## [100] plotrix_3.8-1            cli_3.0.0                Rsamtools_2.8.0         
## [103] XVector_0.32.0           htmlTable_2.2.1          MASS_7.3-54             
## [106] tidyselect_1.1.1         stringi_1.6.2            highr_0.9               
## [109] yaml_2.2.1               GOSemSim_2.18.0          askpass_1.1             
## [112] locfit_1.5-9.4           latticeExtra_0.6-29      survMisc_0.5.5          
## [115] fastmatch_1.1-0          tools_4.1.0              rio_0.5.27              
## [118] rstudioapi_0.13          foreach_1.5.1            foreign_0.8-81          
## [121] farver_2.1.0             ggraph_2.0.5             digest_0.6.27           
## [124] rvcheck_0.1.8            BiocManager_1.30.16      ggtext_0.1.1            
## [127] gridtext_0.1.4           Rcpp_1.0.6               car_3.0-11              
## [130] httr_1.4.2               colorspace_2.0-2         rvest_1.0.0             
## [133] fs_1.5.0                 XML_3.99-0.6             splines_4.1.0           
## [136] tidytree_0.3.4           graphlayouts_0.7.1       xtable_1.8-4            
## [139] jsonlite_1.7.2           ggtree_3.0.2             tidygraph_1.2.0         
## [142] lemon_0.4.5              R6_2.5.0                 pillar_1.6.1            
## [145] htmltools_0.5.1.1        DT_0.18                  glue_1.4.2              
## [148] fastmap_1.1.0            BiocParallel_1.26.1      codetools_0.2-18        
## [151] fgsea_1.18.0             mvtnorm_1.1-2            utf8_1.2.1              
## [154] curl_4.3.2               gtools_3.9.2             zip_2.2.0               
## [157] GO.db_3.13.0             openxlsx_4.2.4           rmarkdown_2.9           
## [160] munsell_0.5.0            DO.db_2.9                GenomeInfoDbData_1.2.6  
## [163] iterators_1.0.13         impute_1.66.0            haven_2.4.1             
## [166] reshape2_1.4.4

51 Appendix

sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.1                          
##  [2] stringr_1.4.0                          
##  [3] dplyr_1.0.7                            
##  [4] purrr_0.3.4                            
##  [5] readr_1.4.0                            
##  [6] tibble_3.1.2                           
##  [7] tidyverse_1.3.1                        
##  [8] tidyr_1.1.3                            
##  [9] rlang_0.4.11                           
## [10] maxstat_0.7-25                         
## [11] data.table_1.14.0                      
## [12] png_0.1-7                              
## [13] formattable_0.2.1                      
## [14] broom_0.7.8                            
## [15] scales_1.1.1                           
## [16] gtable_0.3.0                           
## [17] genomation_1.24.0                      
## [18] ChIPseeker_1.28.3                      
## [19] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [20] GenomicFeatures_1.44.0                 
## [21] msigdbr_7.4.1                          
## [22] org.Hs.eg.db_3.13.0                    
## [23] AnnotationDbi_1.54.1                   
## [24] clusterProfiler_4.0.0                  
## [25] ConsensusClusterPlus_1.56.0            
## [26] glmnet_4.1-2                           
## [27] Matrix_1.3-4                           
## [28] survminer_0.4.9                        
## [29] ggpubr_0.4.0                           
## [30] DESeq2_1.32.0                          
## [31] SummarizedExperiment_1.22.0            
## [32] Biobase_2.52.0                         
## [33] MatrixGenerics_1.4.0                   
## [34] matrixStats_0.59.0                     
## [35] GenomicRanges_1.44.0                   
## [36] GenomeInfoDb_1.28.1                    
## [37] IRanges_2.26.0                         
## [38] S4Vectors_0.30.0                       
## [39] BiocGenerics_0.38.0                    
## [40] ggfortify_0.4.11                       
## [41] pheatmap_1.0.12                        
## [42] magrittr_2.0.1                         
## [43] ggbeeswarm_0.6.0                       
## [44] ggrepel_0.9.1                          
## [45] RColorBrewer_1.1-2                     
## [46] cowplot_1.1.1                          
## [47] patchwork_1.1.1                        
## [48] magick_2.7.2                           
## [49] gridExtra_2.3                          
## [50] Hmisc_4.5-0                            
## [51] ggplot2_3.3.5                          
## [52] Formula_1.2-4                          
## [53] survival_3.2-11                        
## [54] lattice_0.20-44                        
## [55] plyr_1.8.6                             
## [56] BiocStyle_2.20.2                       
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.3           rtracklayer_1.52.0       exactRankTests_0.8-32   
##   [4] bit64_4.0.5              knitr_1.33               DelayedArray_0.18.0     
##   [7] rpart_4.1-15             KEGGREST_1.32.0          RCurl_1.98-1.3          
##  [10] generics_0.1.0           RSQLite_2.2.7            shadowtext_0.0.8        
##  [13] bit_4.0.4                enrichplot_1.12.2        lubridate_1.7.10        
##  [16] xml2_1.3.2               assertthat_0.2.1         viridis_0.6.1           
##  [19] xfun_0.24                hms_1.1.0                babelgene_21.4          
##  [22] evaluate_0.14            fansi_0.5.0              restfulr_0.0.13         
##  [25] progress_1.2.2           caTools_1.18.2           dbplyr_2.1.1            
##  [28] readxl_1.3.1             km.ci_0.5-2              igraph_1.2.6            
##  [31] DBI_1.1.1                geneplotter_1.70.0       htmlwidgets_1.5.3       
##  [34] ellipsis_0.3.2           crosstalk_1.1.1          backports_1.2.1         
##  [37] bookdown_0.22.3          markdown_1.1             annotate_1.70.0         
##  [40] gridBase_0.4-7           biomaRt_2.48.2           vctrs_0.3.8             
##  [43] abind_1.4-5              cachem_1.0.5             withr_2.4.2             
##  [46] ggforce_0.3.3            BSgenome_1.60.0          checkmate_2.0.0         
##  [49] GenomicAlignments_1.28.0 treeio_1.16.1            prettyunits_1.1.1       
##  [52] cluster_2.1.2            DOSE_3.18.1              mltools_0.3.5           
##  [55] ape_5.5                  lazyeval_0.2.2           crayon_1.4.1            
##  [58] genefilter_1.74.0        labeling_0.4.2           pkgconfig_2.0.3         
##  [61] tweenr_1.0.2             nlme_3.1-152             vipor_0.4.5             
##  [64] nnet_7.3-16              lifecycle_1.0.0          downloader_0.4          
##  [67] filelock_1.0.2           BiocFileCache_2.0.0      modelr_0.1.8            
##  [70] seqPattern_1.24.0        cellranger_1.1.0         polyclip_1.10-0         
##  [73] aplot_0.0.6              KMsurv_0.1-5             carData_3.0-4           
##  [76] boot_1.3-28              zoo_1.8-9                reprex_2.0.0            
##  [79] base64enc_0.1-3          beeswarm_0.4.0           viridisLite_0.4.0       
##  [82] rjson_0.2.20             bitops_1.0-7             KernSmooth_2.23-20      
##  [85] Biostrings_2.60.1        blob_1.2.1               shape_1.4.6             
##  [88] pdftools_3.0.1           qvalue_2.24.0            qpdf_1.1                
##  [91] jpeg_0.1-8.1             rstatix_0.7.0            ggsignif_0.6.2          
##  [94] memoise_2.0.0            gplots_3.1.1             zlibbioc_1.38.0         
##  [97] compiler_4.1.0           scatterpie_0.1.6         BiocIO_1.2.0            
## [100] plotrix_3.8-1            cli_3.0.0                Rsamtools_2.8.0         
## [103] XVector_0.32.0           htmlTable_2.2.1          MASS_7.3-54             
## [106] tidyselect_1.1.1         stringi_1.6.2            highr_0.9               
## [109] yaml_2.2.1               GOSemSim_2.18.0          askpass_1.1             
## [112] locfit_1.5-9.4           latticeExtra_0.6-29      survMisc_0.5.5          
## [115] fastmatch_1.1-0          tools_4.1.0              rio_0.5.27              
## [118] rstudioapi_0.13          foreach_1.5.1            foreign_0.8-81          
## [121] farver_2.1.0             ggraph_2.0.5             digest_0.6.27           
## [124] rvcheck_0.1.8            BiocManager_1.30.16      ggtext_0.1.1            
## [127] gridtext_0.1.4           Rcpp_1.0.6               car_3.0-11              
## [130] httr_1.4.2               colorspace_2.0-2         rvest_1.0.0             
## [133] fs_1.5.0                 XML_3.99-0.6             splines_4.1.0           
## [136] tidytree_0.3.4           graphlayouts_0.7.1       xtable_1.8-4            
## [139] jsonlite_1.7.2           ggtree_3.0.2             tidygraph_1.2.0         
## [142] lemon_0.4.5              R6_2.5.0                 pillar_1.6.1            
## [145] htmltools_0.5.1.1        DT_0.18                  glue_1.4.2              
## [148] fastmap_1.1.0            BiocParallel_1.26.1      codetools_0.2-18        
## [151] fgsea_1.18.0             mvtnorm_1.1-2            utf8_1.2.1              
## [154] curl_4.3.2               gtools_3.9.2             zip_2.2.0               
## [157] GO.db_3.13.0             openxlsx_4.2.4           rmarkdown_2.9           
## [160] munsell_0.5.0            DO.db_2.9                GenomeInfoDbData_1.2.6  
## [163] iterators_1.0.13         impute_1.66.0            haven_2.4.1             
## [166] reshape2_1.4.4